2017-12-19 15:49:03 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
* Copyright(c) 2010-2016 Intel Corporation
|
2014-02-10 13:57:48 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdint.h>
|
2015-10-22 20:35:52 +08:00
|
|
|
#include <stdbool.h>
|
2014-10-09 02:54:54 +08:00
|
|
|
#include <linux/virtio_net.h>
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2014-10-09 02:54:54 +08:00
|
|
|
#include <rte_mbuf.h>
|
|
|
|
#include <rte_memcpy.h>
|
2021-05-03 18:43:44 +02:00
|
|
|
#include <rte_net.h>
|
2016-02-05 15:31:38 +08:00
|
|
|
#include <rte_ether.h>
|
|
|
|
#include <rte_ip.h>
|
2017-04-01 15:22:57 +08:00
|
|
|
#include <rte_vhost.h>
|
2016-02-05 15:31:38 +08:00
|
|
|
#include <rte_tcp.h>
|
|
|
|
#include <rte_udp.h>
|
|
|
|
#include <rte_sctp.h>
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 22:36:11 +08:00
|
|
|
#include <rte_arp.h>
|
2018-01-17 15:49:25 +02:00
|
|
|
#include <rte_spinlock.h>
|
2018-01-24 11:27:25 +01:00
|
|
|
#include <rte_malloc.h>
|
2020-07-07 13:07:09 +08:00
|
|
|
#include <rte_vhost_async.h>
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2017-10-05 10:36:20 +02:00
|
|
|
#include "iotlb.h"
|
vhost: refactor code structure
The code structure is a bit messy now. For example, vhost-user message
handling is spread to three different files:
vhost-net-user.c virtio-net.c virtio-net-user.c
Where, vhost-net-user.c is the entrance to handle all those messages
and then invoke the right method for a specific message. Some of them
are stored at virtio-net.c, while others are stored at virtio-net-user.c.
The truth is all of them should be in one file, vhost_user.c.
So this patch refactors the source code structure: mainly on renaming
files and moving code from one file to another file that is more suitable
for storing it. Thus, no functional changes are made.
After the refactor, the code structure becomes to:
- socket.c handles all vhost-user socket file related stuff, such
as, socket file creation for server mode, reconnection
for client mode.
- vhost.c mainly on stuff like vhost device creation/destroy/reset.
Most of the vhost API implementation are there, too.
- vhost_user.c all stuff about vhost-user messages handling goes there.
- virtio_net.c all stuff about virtio-net should go there. It has virtio
net Rx/Tx implementation only so far: it's just a rename
from vhost_rxtx.c
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
2016-08-18 16:48:39 +08:00
|
|
|
#include "vhost.h"
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2017-09-08 20:50:46 +08:00
|
|
|
#define MAX_BATCH_LEN 256
|
|
|
|
|
2020-07-07 13:07:09 +08:00
|
|
|
#define VHOST_ASYNC_BATCH_THRESHOLD 32
|
|
|
|
|
2018-05-29 11:45:13 +02:00
|
|
|
static __rte_always_inline bool
|
|
|
|
rxvq_is_mergeable(struct virtio_net *dev)
|
|
|
|
{
|
|
|
|
return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:32 +08:00
|
|
|
static __rte_always_inline bool
|
|
|
|
virtio_net_is_inorder(struct virtio_net *dev)
|
|
|
|
{
|
|
|
|
return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
|
|
|
|
}
|
|
|
|
|
2015-10-22 20:35:52 +08:00
|
|
|
static bool
|
2017-04-01 15:22:47 +08:00
|
|
|
is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
|
2015-10-22 20:35:52 +08:00
|
|
|
{
|
2017-04-01 15:22:47 +08:00
|
|
|
return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
|
2015-10-22 20:35:52 +08:00
|
|
|
}
|
|
|
|
|
2020-04-17 09:16:09 +08:00
|
|
|
static inline void
|
|
|
|
do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
|
|
|
{
|
|
|
|
struct batch_copy_elem *elem = vq->batch_copy_elems;
|
|
|
|
uint16_t count = vq->batch_copy_nb_elems;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
|
|
|
|
vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
|
|
|
|
elem[i].len);
|
|
|
|
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
vq->batch_copy_nb_elems = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
do_data_copy_dequeue(struct vhost_virtqueue *vq)
|
|
|
|
{
|
|
|
|
struct batch_copy_elem *elem = vq->batch_copy_elems;
|
|
|
|
uint16_t count = vq->batch_copy_nb_elems;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
|
|
|
|
|
|
|
|
vq->batch_copy_nb_elems = 0;
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline void
|
2018-07-06 09:07:15 +02:00
|
|
|
do_flush_shadow_used_ring_split(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
uint16_t to, uint16_t from, uint16_t size)
|
2016-10-14 17:34:36 +08:00
|
|
|
{
|
|
|
|
rte_memcpy(&vq->used->ring[to],
|
2018-07-06 09:07:16 +02:00
|
|
|
&vq->shadow_used_split[from],
|
2016-10-14 17:34:36 +08:00
|
|
|
size * sizeof(struct vring_used_elem));
|
2018-05-17 13:44:47 +02:00
|
|
|
vhost_log_cache_used_vring(dev, vq,
|
2016-10-14 17:34:36 +08:00
|
|
|
offsetof(struct vring_used, ring[to]),
|
|
|
|
size * sizeof(struct vring_used_elem));
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline void
|
2018-07-06 09:07:15 +02:00
|
|
|
flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
2016-10-14 17:34:36 +08:00
|
|
|
{
|
|
|
|
uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
|
|
|
|
|
|
|
|
if (used_idx + vq->shadow_used_idx <= vq->size) {
|
2018-07-06 09:07:15 +02:00
|
|
|
do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
|
2016-10-14 17:34:36 +08:00
|
|
|
vq->shadow_used_idx);
|
|
|
|
} else {
|
|
|
|
uint16_t size;
|
|
|
|
|
|
|
|
/* update used ring interval [used_idx, vq->size] */
|
|
|
|
size = vq->size - used_idx;
|
2018-07-06 09:07:15 +02:00
|
|
|
do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
|
2016-10-14 17:34:36 +08:00
|
|
|
|
|
|
|
/* update the left half used ring interval [0, left_size] */
|
2018-07-06 09:07:15 +02:00
|
|
|
do_flush_shadow_used_ring_split(dev, vq, 0, size,
|
2016-10-14 17:34:36 +08:00
|
|
|
vq->shadow_used_idx - size);
|
|
|
|
}
|
|
|
|
vq->last_used_idx += vq->shadow_used_idx;
|
|
|
|
|
2018-05-17 13:44:47 +02:00
|
|
|
vhost_log_cache_sync(dev, vq);
|
|
|
|
|
2020-04-30 17:14:36 +08:00
|
|
|
__atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
|
|
|
|
__ATOMIC_RELEASE);
|
2018-07-06 09:07:11 +02:00
|
|
|
vq->shadow_used_idx = 0;
|
2016-10-14 17:34:36 +08:00
|
|
|
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
|
|
|
|
sizeof(vq->used->idx));
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline void
|
2018-07-06 09:07:15 +02:00
|
|
|
update_shadow_used_ring_split(struct vhost_virtqueue *vq,
|
2018-09-17 11:54:42 +08:00
|
|
|
uint16_t desc_idx, uint32_t len)
|
2016-10-14 17:34:36 +08:00
|
|
|
{
|
|
|
|
uint16_t i = vq->shadow_used_idx++;
|
|
|
|
|
2018-07-06 09:07:16 +02:00
|
|
|
vq->shadow_used_split[i].id = desc_idx;
|
|
|
|
vq->shadow_used_split[i].len = len;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:26 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
uint16_t used_idx = vq->last_used_idx;
|
|
|
|
uint16_t head_idx = vq->last_used_idx;
|
|
|
|
uint16_t head_flags = 0;
|
|
|
|
|
|
|
|
/* Split loop in two to save memory barriers */
|
|
|
|
for (i = 0; i < vq->shadow_used_idx; i++) {
|
|
|
|
vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
|
|
|
|
vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
|
|
|
|
|
|
|
|
used_idx += vq->shadow_used_packed[i].count;
|
|
|
|
if (used_idx >= vq->size)
|
|
|
|
used_idx -= vq->size;
|
|
|
|
}
|
|
|
|
|
2020-12-21 23:50:32 +08:00
|
|
|
/* The ordering for storing desc flags needs to be enforced. */
|
|
|
|
rte_atomic_thread_fence(__ATOMIC_RELEASE);
|
2019-10-25 00:08:26 +08:00
|
|
|
|
|
|
|
for (i = 0; i < vq->shadow_used_idx; i++) {
|
|
|
|
uint16_t flags;
|
|
|
|
|
|
|
|
if (vq->shadow_used_packed[i].len)
|
|
|
|
flags = VRING_DESC_F_WRITE;
|
|
|
|
else
|
|
|
|
flags = 0;
|
|
|
|
|
|
|
|
if (vq->used_wrap_counter) {
|
|
|
|
flags |= VRING_DESC_F_USED;
|
|
|
|
flags |= VRING_DESC_F_AVAIL;
|
|
|
|
} else {
|
|
|
|
flags &= ~VRING_DESC_F_USED;
|
|
|
|
flags &= ~VRING_DESC_F_AVAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i > 0) {
|
|
|
|
vq->desc_packed[vq->last_used_idx].flags = flags;
|
|
|
|
|
|
|
|
vhost_log_cache_used_vring(dev, vq,
|
|
|
|
vq->last_used_idx *
|
|
|
|
sizeof(struct vring_packed_desc),
|
|
|
|
sizeof(struct vring_packed_desc));
|
|
|
|
} else {
|
|
|
|
head_idx = vq->last_used_idx;
|
|
|
|
head_flags = flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
|
|
|
|
}
|
|
|
|
|
|
|
|
vq->desc_packed[head_idx].flags = head_flags;
|
|
|
|
|
|
|
|
vhost_log_cache_used_vring(dev, vq,
|
|
|
|
head_idx *
|
|
|
|
sizeof(struct vring_packed_desc),
|
|
|
|
sizeof(struct vring_packed_desc));
|
|
|
|
|
|
|
|
vq->shadow_used_idx = 0;
|
|
|
|
vhost_log_cache_sync(dev, vq);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:28 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq)
|
|
|
|
{
|
|
|
|
struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
|
|
|
|
|
|
|
|
vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
|
2020-12-21 23:50:30 +08:00
|
|
|
/* desc flags is the synchronization point for virtio packed vring */
|
|
|
|
__atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
|
|
|
|
used_elem->flags, __ATOMIC_RELEASE);
|
2019-10-25 00:08:28 +08:00
|
|
|
|
|
|
|
vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
|
|
|
|
sizeof(struct vring_packed_desc),
|
|
|
|
sizeof(struct vring_packed_desc));
|
|
|
|
vq->shadow_used_idx = 0;
|
|
|
|
vhost_log_cache_sync(dev, vq);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:27 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
uint64_t *lens,
|
|
|
|
uint16_t *ids)
|
|
|
|
{
|
|
|
|
uint16_t i;
|
|
|
|
uint16_t flags;
|
2021-05-14 18:02:07 +02:00
|
|
|
uint16_t last_used_idx;
|
|
|
|
struct vring_packed_desc *desc_base;
|
2019-10-25 00:08:27 +08:00
|
|
|
|
2021-05-14 18:02:07 +02:00
|
|
|
last_used_idx = vq->last_used_idx;
|
|
|
|
desc_base = &vq->desc_packed[last_used_idx];
|
|
|
|
|
2019-10-25 00:08:27 +08:00
|
|
|
flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
2021-04-28 10:17:31 +08:00
|
|
|
desc_base[i].id = ids[i];
|
|
|
|
desc_base[i].len = lens[i];
|
2019-10-25 00:08:27 +08:00
|
|
|
}
|
|
|
|
|
2020-12-21 23:50:32 +08:00
|
|
|
rte_atomic_thread_fence(__ATOMIC_RELEASE);
|
2019-10-25 00:08:27 +08:00
|
|
|
|
2021-04-28 10:17:31 +08:00
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
desc_base[i].flags = flags;
|
|
|
|
}
|
2019-10-25 00:08:27 +08:00
|
|
|
|
2021-04-28 10:17:31 +08:00
|
|
|
vhost_log_cache_used_vring(dev, vq, last_used_idx *
|
2019-10-25 00:08:27 +08:00
|
|
|
sizeof(struct vring_packed_desc),
|
|
|
|
sizeof(struct vring_packed_desc) *
|
|
|
|
PACKED_BATCH_SIZE);
|
|
|
|
vhost_log_cache_sync(dev, vq);
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:32 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
|
|
|
|
uint16_t id)
|
|
|
|
{
|
|
|
|
vq->shadow_used_packed[0].id = id;
|
|
|
|
|
|
|
|
if (!vq->shadow_used_idx) {
|
|
|
|
vq->shadow_last_used_idx = vq->last_used_idx;
|
|
|
|
vq->shadow_used_packed[0].flags =
|
|
|
|
PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
|
|
|
|
vq->shadow_used_packed[0].len = 0;
|
|
|
|
vq->shadow_used_packed[0].count = 1;
|
|
|
|
vq->shadow_used_idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:28 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
uint16_t *ids)
|
|
|
|
{
|
|
|
|
uint16_t flags;
|
|
|
|
uint16_t i;
|
|
|
|
uint16_t begin;
|
|
|
|
|
|
|
|
flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
|
|
|
|
|
|
|
|
if (!vq->shadow_used_idx) {
|
|
|
|
vq->shadow_last_used_idx = vq->last_used_idx;
|
|
|
|
vq->shadow_used_packed[0].id = ids[0];
|
|
|
|
vq->shadow_used_packed[0].len = 0;
|
|
|
|
vq->shadow_used_packed[0].count = 1;
|
|
|
|
vq->shadow_used_packed[0].flags = flags;
|
|
|
|
vq->shadow_used_idx++;
|
|
|
|
begin = 1;
|
|
|
|
} else
|
|
|
|
begin = 0;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
|
|
|
|
vq->desc_packed[vq->last_used_idx + i].id = ids[i];
|
|
|
|
vq->desc_packed[vq->last_used_idx + i].len = 0;
|
|
|
|
}
|
|
|
|
|
2020-12-21 23:50:32 +08:00
|
|
|
rte_atomic_thread_fence(__ATOMIC_RELEASE);
|
2019-10-25 00:08:28 +08:00
|
|
|
vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
|
|
|
|
vq->desc_packed[vq->last_used_idx + i].flags = flags;
|
|
|
|
|
|
|
|
vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
|
|
|
|
sizeof(struct vring_packed_desc),
|
|
|
|
sizeof(struct vring_packed_desc) *
|
|
|
|
PACKED_BATCH_SIZE);
|
|
|
|
vhost_log_cache_sync(dev, vq);
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
|
|
|
|
uint16_t buf_id,
|
|
|
|
uint16_t count)
|
|
|
|
{
|
|
|
|
uint16_t flags;
|
|
|
|
|
|
|
|
flags = vq->desc_packed[vq->last_used_idx].flags;
|
|
|
|
if (vq->used_wrap_counter) {
|
|
|
|
flags |= VRING_DESC_F_USED;
|
|
|
|
flags |= VRING_DESC_F_AVAIL;
|
|
|
|
} else {
|
|
|
|
flags &= ~VRING_DESC_F_USED;
|
|
|
|
flags &= ~VRING_DESC_F_AVAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vq->shadow_used_idx) {
|
|
|
|
vq->shadow_last_used_idx = vq->last_used_idx;
|
|
|
|
|
|
|
|
vq->shadow_used_packed[0].id = buf_id;
|
|
|
|
vq->shadow_used_packed[0].len = 0;
|
|
|
|
vq->shadow_used_packed[0].flags = flags;
|
|
|
|
vq->shadow_used_idx++;
|
|
|
|
} else {
|
|
|
|
vq->desc_packed[vq->last_used_idx].id = buf_id;
|
|
|
|
vq->desc_packed[vq->last_used_idx].len = 0;
|
|
|
|
vq->desc_packed[vq->last_used_idx].flags = flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, count);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:32 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
|
|
|
|
uint16_t buf_id,
|
|
|
|
uint16_t count)
|
|
|
|
{
|
|
|
|
uint16_t flags;
|
|
|
|
|
|
|
|
vq->shadow_used_packed[0].id = buf_id;
|
|
|
|
|
|
|
|
flags = vq->desc_packed[vq->last_used_idx].flags;
|
|
|
|
if (vq->used_wrap_counter) {
|
|
|
|
flags |= VRING_DESC_F_USED;
|
|
|
|
flags |= VRING_DESC_F_AVAIL;
|
|
|
|
} else {
|
|
|
|
flags &= ~VRING_DESC_F_USED;
|
|
|
|
flags &= ~VRING_DESC_F_AVAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vq->shadow_used_idx) {
|
|
|
|
vq->shadow_last_used_idx = vq->last_used_idx;
|
|
|
|
vq->shadow_used_packed[0].len = 0;
|
|
|
|
vq->shadow_used_packed[0].flags = flags;
|
|
|
|
vq->shadow_used_idx++;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, count);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:26 +08:00
|
|
|
static __rte_always_inline void
|
2021-04-27 08:03:33 +00:00
|
|
|
vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
|
|
|
|
uint32_t *len,
|
|
|
|
uint16_t *id,
|
|
|
|
uint16_t *count,
|
2019-10-25 00:08:26 +08:00
|
|
|
uint16_t num_buffers)
|
|
|
|
{
|
|
|
|
uint16_t i;
|
2021-04-27 08:03:33 +00:00
|
|
|
|
2019-10-25 00:08:26 +08:00
|
|
|
for (i = 0; i < num_buffers; i++) {
|
|
|
|
/* enqueue shadow flush action aligned with batch num */
|
|
|
|
if (!vq->shadow_used_idx)
|
|
|
|
vq->shadow_aligned_idx = vq->last_used_idx &
|
|
|
|
PACKED_BATCH_MASK;
|
|
|
|
vq->shadow_used_packed[vq->shadow_used_idx].id = id[i];
|
|
|
|
vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
|
|
|
|
vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
|
|
|
|
vq->shadow_aligned_idx += count[i];
|
|
|
|
vq->shadow_used_idx++;
|
|
|
|
}
|
2021-04-27 08:03:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
uint32_t *len,
|
|
|
|
uint16_t *id,
|
|
|
|
uint16_t *count,
|
|
|
|
uint16_t num_buffers)
|
|
|
|
{
|
|
|
|
vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
|
2019-10-25 00:08:26 +08:00
|
|
|
|
|
|
|
if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
|
|
|
|
do_data_copy_enqueue(dev, vq);
|
|
|
|
vhost_flush_enqueue_shadow_packed(dev, vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-14 15:53:18 +08:00
|
|
|
/* avoid write operation when necessary, to lessen cache issues */
|
|
|
|
#define ASSIGN_UNLESS_EQUAL(var, val) do { \
|
|
|
|
if ((var) != (val)) \
|
|
|
|
(var) = (val); \
|
|
|
|
} while (0)
|
|
|
|
|
2018-05-29 11:45:13 +02:00
|
|
|
static __rte_always_inline void
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
|
|
|
|
{
|
2021-10-15 21:24:08 +02:00
|
|
|
uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
|
2017-06-07 06:41:36 +00:00
|
|
|
|
2021-10-15 21:24:08 +02:00
|
|
|
if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
|
|
|
|
csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
|
2017-06-07 06:41:36 +00:00
|
|
|
|
|
|
|
if (csum_l4) {
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
|
|
|
|
net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
|
|
|
|
|
2017-06-07 06:41:36 +00:00
|
|
|
switch (csum_l4) {
|
2021-10-15 21:24:08 +02:00
|
|
|
case RTE_MBUF_F_TX_TCP_CKSUM:
|
2019-05-21 18:13:13 +02:00
|
|
|
net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
cksum));
|
|
|
|
break;
|
2021-10-15 21:24:08 +02:00
|
|
|
case RTE_MBUF_F_TX_UDP_CKSUM:
|
2019-05-21 18:13:14 +02:00
|
|
|
net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
dgram_cksum));
|
|
|
|
break;
|
2021-10-15 21:24:08 +02:00
|
|
|
case RTE_MBUF_F_TX_SCTP_CKSUM:
|
2019-05-21 18:13:12 +02:00
|
|
|
net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
cksum));
|
|
|
|
break;
|
|
|
|
}
|
2017-04-14 15:53:18 +08:00
|
|
|
} else {
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
}
|
|
|
|
|
2017-06-07 06:41:37 +00:00
|
|
|
/* IP cksum verification cannot be bypassed, then calculate here */
|
2021-10-15 21:24:08 +02:00
|
|
|
if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
|
2019-05-21 18:13:10 +02:00
|
|
|
struct rte_ipv4_hdr *ipv4_hdr;
|
2017-06-07 06:41:37 +00:00
|
|
|
|
2019-05-21 18:13:10 +02:00
|
|
|
ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
|
2017-06-07 06:41:37 +00:00
|
|
|
m_buf->l2_len);
|
2019-10-24 13:29:19 -03:00
|
|
|
ipv4_hdr->hdr_checksum = 0;
|
2017-06-07 06:41:37 +00:00
|
|
|
ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
|
|
|
|
}
|
|
|
|
|
2021-10-15 21:24:08 +02:00
|
|
|
if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
|
|
|
|
if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4)
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
|
|
|
|
else
|
|
|
|
net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
|
|
|
|
net_hdr->gso_size = m_buf->tso_segsz;
|
|
|
|
net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
|
|
|
|
+ m_buf->l4_len;
|
2021-10-15 21:24:08 +02:00
|
|
|
} else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
|
2017-11-21 14:56:52 +08:00
|
|
|
net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
|
|
|
|
net_hdr->gso_size = m_buf->tso_segsz;
|
|
|
|
net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
|
|
|
|
m_buf->l4_len;
|
2017-04-14 15:53:18 +08:00
|
|
|
} else {
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
|
|
|
|
ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-06 09:07:17 +02:00
|
|
|
static __rte_always_inline int
|
|
|
|
map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
|
struct buf_vector *buf_vec, uint16_t *vec_idx,
|
|
|
|
uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
|
|
|
|
{
|
|
|
|
uint16_t vec_id = *vec_idx;
|
|
|
|
|
|
|
|
while (desc_len) {
|
|
|
|
uint64_t desc_addr;
|
|
|
|
uint64_t desc_chunck_len = desc_len;
|
|
|
|
|
|
|
|
if (unlikely(vec_id >= BUF_VECTOR_MAX))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
desc_addr = vhost_iova_to_vva(dev, vq,
|
|
|
|
desc_iova,
|
|
|
|
&desc_chunck_len,
|
|
|
|
perm);
|
|
|
|
if (unlikely(!desc_addr))
|
|
|
|
return -1;
|
|
|
|
|
2019-05-29 15:04:19 +02:00
|
|
|
rte_prefetch0((void *)(uintptr_t)desc_addr);
|
|
|
|
|
2018-07-06 09:07:17 +02:00
|
|
|
buf_vec[vec_id].buf_iova = desc_iova;
|
|
|
|
buf_vec[vec_id].buf_addr = desc_addr;
|
|
|
|
buf_vec[vec_id].buf_len = desc_chunck_len;
|
|
|
|
|
|
|
|
desc_len -= desc_chunck_len;
|
|
|
|
desc_iova += desc_chunck_len;
|
|
|
|
vec_id++;
|
|
|
|
}
|
|
|
|
*vec_idx = vec_id;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline int
|
2018-07-06 09:07:14 +02:00
|
|
|
fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
|
uint32_t avail_idx, uint16_t *vec_idx,
|
2016-10-18 17:35:38 +02:00
|
|
|
struct buf_vector *buf_vec, uint16_t *desc_chain_head,
|
2018-09-17 11:54:42 +08:00
|
|
|
uint32_t *desc_chain_len, uint8_t perm)
|
2014-08-15 12:58:01 +08:00
|
|
|
{
|
2016-03-14 15:35:22 +08:00
|
|
|
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
|
2018-07-06 09:07:14 +02:00
|
|
|
uint16_t vec_id = *vec_idx;
|
2016-10-14 17:34:36 +08:00
|
|
|
uint32_t len = 0;
|
2018-07-06 09:07:17 +02:00
|
|
|
uint64_t dlen;
|
2019-01-04 12:06:41 +08:00
|
|
|
uint32_t nr_descs = vq->size;
|
2019-01-21 16:12:18 +08:00
|
|
|
uint32_t cnt = 0;
|
2016-10-18 17:35:38 +02:00
|
|
|
struct vring_desc *descs = vq->desc;
|
2018-01-24 11:27:25 +01:00
|
|
|
struct vring_desc *idesc = NULL;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2019-01-04 12:06:40 +08:00
|
|
|
if (unlikely(idx >= vq->size))
|
|
|
|
return -1;
|
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
*desc_chain_head = idx;
|
2016-10-18 17:35:38 +02:00
|
|
|
|
|
|
|
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
|
2018-01-23 14:26:02 +01:00
|
|
|
dlen = vq->desc[idx].len;
|
2019-01-04 12:06:41 +08:00
|
|
|
nr_descs = dlen / sizeof(struct vring_desc);
|
|
|
|
if (unlikely(nr_descs > vq->size))
|
|
|
|
return -1;
|
|
|
|
|
2016-10-18 17:35:38 +02:00
|
|
|
descs = (struct vring_desc *)(uintptr_t)
|
2017-10-05 10:36:20 +02:00
|
|
|
vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
|
2018-01-23 14:26:02 +01:00
|
|
|
&dlen,
|
2017-10-05 10:36:20 +02:00
|
|
|
VHOST_ACCESS_RO);
|
2018-01-24 11:27:25 +01:00
|
|
|
if (unlikely(!descs))
|
2016-10-18 17:35:38 +02:00
|
|
|
return -1;
|
|
|
|
|
2018-01-24 11:27:25 +01:00
|
|
|
if (unlikely(dlen < vq->desc[idx].len)) {
|
|
|
|
/*
|
|
|
|
* The indirect desc table is not contiguous
|
|
|
|
* in process VA space, we have to copy it.
|
|
|
|
*/
|
2019-05-29 15:04:18 +02:00
|
|
|
idesc = vhost_alloc_copy_ind_table(dev, vq,
|
2018-07-06 09:07:12 +02:00
|
|
|
vq->desc[idx].addr, vq->desc[idx].len);
|
2018-01-24 11:27:25 +01:00
|
|
|
if (unlikely(!idesc))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
descs = idesc;
|
|
|
|
}
|
|
|
|
|
2016-10-18 17:35:38 +02:00
|
|
|
idx = 0;
|
|
|
|
}
|
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
while (1) {
|
2019-01-21 16:12:18 +08:00
|
|
|
if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
|
2019-01-04 12:06:41 +08:00
|
|
|
free_ind_table(idesc);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-03-31 14:49:37 +08:00
|
|
|
dlen = descs[idx].len;
|
|
|
|
len += dlen;
|
2018-07-06 09:04:46 +02:00
|
|
|
|
2018-07-06 09:07:17 +02:00
|
|
|
if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
|
2021-03-31 14:49:37 +08:00
|
|
|
descs[idx].addr, dlen,
|
2018-07-06 09:07:17 +02:00
|
|
|
perm))) {
|
|
|
|
free_ind_table(idesc);
|
|
|
|
return -1;
|
2018-07-06 09:04:46 +02:00
|
|
|
}
|
2015-10-22 20:35:54 +08:00
|
|
|
|
2016-10-18 17:35:38 +02:00
|
|
|
if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
|
2016-03-14 15:35:22 +08:00
|
|
|
break;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-10-18 17:35:38 +02:00
|
|
|
idx = descs[idx].next;
|
2016-03-14 15:35:22 +08:00
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
*desc_chain_len = len;
|
|
|
|
*vec_idx = vec_id;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-01-24 11:27:25 +01:00
|
|
|
if (unlikely(!!idesc))
|
|
|
|
free_ind_table(idesc);
|
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
/*
|
|
|
|
* Returns -1 on fail, 0 on success
|
|
|
|
*/
|
|
|
|
static inline int
|
2018-07-06 09:07:14 +02:00
|
|
|
reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2016-10-18 17:35:38 +02:00
|
|
|
uint32_t size, struct buf_vector *buf_vec,
|
2018-07-06 09:04:46 +02:00
|
|
|
uint16_t *num_buffers, uint16_t avail_head,
|
|
|
|
uint16_t *nr_vec)
|
2016-03-14 15:35:22 +08:00
|
|
|
{
|
2016-06-13 19:52:12 +08:00
|
|
|
uint16_t cur_idx;
|
2018-07-06 09:07:14 +02:00
|
|
|
uint16_t vec_idx = 0;
|
2018-05-29 11:45:13 +02:00
|
|
|
uint16_t max_tries, tries = 0;
|
vhost: add guest offload setting
Add guest offload setting in vhost lib.
Virtio 1.0 spec (5.1.6.4 Processing of Incoming Packets) says:
1. If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
VIRTIO_NET_HDR_F_NEEDS_CSUM bit in flags can be set: if so,
the packet checksum at offset csum_offset from csum_start
and any preceding checksums have been validated. The checksum
on the packet is incomplete and csum_start and csum_offset
indicate how to calculate it (see Packet Transmission point 1).
2. If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
negotiated, then gso_type MAY be something other than
VIRTIO_NET_HDR_GSO_NONE, and gso_size field indicates the
desired MSS (see Packet Transmission point 2).
In order to support these features, the following changes are added,
1. Extend 'VHOST_SUPPORTED_FEATURES' macro to add the offload features negotiation.
2. Enqueue these offloads: convert some fields in mbuf to the fields in virtio_net_hdr.
There are more explanations for the implementation.
For VM2VM case, there is no need to do checksum, for we think the
data should be reliable enough, and setting VIRTIO_NET_HDR_F_NEEDS_CSUM
at RX side will let the TCP layer to bypass the checksum validation,
so that the RX side could receive the packet in the end.
In terms of us-vhost, at vhost RX side, the offload information is
inherited from mbuf, which is in turn inherited from TX side. If we
can still get those info at RX side, it means the packet is from
another VM at same host. So, it's safe to set the
VIRTIO_NET_HDR_F_NEEDS_CSUM, to skip checksum validation.
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-05 15:31:39 +08:00
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
uint16_t head_idx = 0;
|
2018-09-17 11:54:42 +08:00
|
|
|
uint32_t len = 0;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
*num_buffers = 0;
|
|
|
|
cur_idx = vq->last_avail_idx;
|
|
|
|
|
2018-05-29 11:45:13 +02:00
|
|
|
if (rxvq_is_mergeable(dev))
|
2018-07-25 21:51:37 +08:00
|
|
|
max_tries = vq->size - 1;
|
2018-05-29 11:45:13 +02:00
|
|
|
else
|
|
|
|
max_tries = 1;
|
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
while (size > 0) {
|
2016-10-14 17:34:38 +08:00
|
|
|
if (unlikely(cur_idx == avail_head))
|
2016-03-14 15:35:22 +08:00
|
|
|
return -1;
|
2018-07-25 21:51:37 +08:00
|
|
|
/*
|
|
|
|
* if we tried all available ring items, and still
|
|
|
|
* can't get enough buf, it means something abnormal
|
|
|
|
* happened.
|
|
|
|
*/
|
|
|
|
if (unlikely(++tries > max_tries))
|
|
|
|
return -1;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
|
|
|
|
&vec_idx, buf_vec,
|
2018-07-06 09:04:46 +02:00
|
|
|
&head_idx, &len,
|
|
|
|
VHOST_ACCESS_RW) < 0))
|
2016-03-14 15:35:22 +08:00
|
|
|
return -1;
|
2016-10-14 17:34:36 +08:00
|
|
|
len = RTE_MIN(len, size);
|
2018-07-06 09:07:15 +02:00
|
|
|
update_shadow_used_ring_split(vq, head_idx, len);
|
2016-10-14 17:34:36 +08:00
|
|
|
size -= len;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-06-13 19:52:12 +08:00
|
|
|
cur_idx++;
|
2016-10-14 17:34:36 +08:00
|
|
|
*num_buffers += 1;
|
2016-03-14 15:35:22 +08:00
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
*nr_vec = vec_idx;
|
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:07:18 +02:00
|
|
|
static __rte_always_inline int
|
|
|
|
fill_vec_buf_packed_indirect(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct vring_packed_desc *desc, uint16_t *vec_idx,
|
2018-09-17 11:54:42 +08:00
|
|
|
struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
|
2018-07-06 09:07:18 +02:00
|
|
|
{
|
|
|
|
uint16_t i;
|
|
|
|
uint32_t nr_descs;
|
|
|
|
uint16_t vec_id = *vec_idx;
|
|
|
|
uint64_t dlen;
|
|
|
|
struct vring_packed_desc *descs, *idescs = NULL;
|
|
|
|
|
|
|
|
dlen = desc->len;
|
|
|
|
descs = (struct vring_packed_desc *)(uintptr_t)
|
|
|
|
vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
|
|
|
|
if (unlikely(!descs))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (unlikely(dlen < desc->len)) {
|
|
|
|
/*
|
|
|
|
* The indirect desc table is not contiguous
|
|
|
|
* in process VA space, we have to copy it.
|
|
|
|
*/
|
2019-05-29 15:04:18 +02:00
|
|
|
idescs = vhost_alloc_copy_ind_table(dev,
|
|
|
|
vq, desc->addr, desc->len);
|
2018-07-06 09:07:18 +02:00
|
|
|
if (unlikely(!idescs))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
descs = idescs;
|
|
|
|
}
|
|
|
|
|
|
|
|
nr_descs = desc->len / sizeof(struct vring_packed_desc);
|
|
|
|
if (unlikely(nr_descs >= vq->size)) {
|
|
|
|
free_ind_table(idescs);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nr_descs; i++) {
|
|
|
|
if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
|
|
|
|
free_ind_table(idescs);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-03-31 14:49:38 +08:00
|
|
|
dlen = descs[i].len;
|
|
|
|
*len += dlen;
|
2018-07-06 09:07:18 +02:00
|
|
|
if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
|
2021-03-31 14:49:38 +08:00
|
|
|
descs[i].addr, dlen,
|
2018-07-06 09:07:18 +02:00
|
|
|
perm)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*vec_idx = vec_id;
|
|
|
|
|
|
|
|
if (unlikely(!!idescs))
|
|
|
|
free_ind_table(idescs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-07-06 09:07:19 +02:00
|
|
|
static __rte_always_inline int
|
2018-07-06 09:07:18 +02:00
|
|
|
fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
|
uint16_t avail_idx, uint16_t *desc_count,
|
|
|
|
struct buf_vector *buf_vec, uint16_t *vec_idx,
|
2018-09-17 11:54:42 +08:00
|
|
|
uint16_t *buf_id, uint32_t *len, uint8_t perm)
|
2018-07-06 09:07:18 +02:00
|
|
|
{
|
|
|
|
bool wrap_counter = vq->avail_wrap_counter;
|
|
|
|
struct vring_packed_desc *descs = vq->desc_packed;
|
|
|
|
uint16_t vec_id = *vec_idx;
|
2021-03-31 14:49:38 +08:00
|
|
|
uint64_t dlen;
|
2018-07-06 09:07:18 +02:00
|
|
|
|
|
|
|
if (avail_idx < vq->last_avail_idx)
|
|
|
|
wrap_counter ^= 1;
|
|
|
|
|
2018-12-19 09:21:11 +01:00
|
|
|
/*
|
2019-09-17 13:28:25 +08:00
|
|
|
* Perform a load-acquire barrier in desc_is_avail to
|
|
|
|
* enforce the ordering between desc flags and desc
|
|
|
|
* content.
|
2018-12-19 09:21:11 +01:00
|
|
|
*/
|
2019-09-17 13:28:25 +08:00
|
|
|
if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
|
|
|
|
return -1;
|
2018-12-19 09:21:11 +01:00
|
|
|
|
2018-07-06 09:07:18 +02:00
|
|
|
*desc_count = 0;
|
2018-10-23 14:07:49 +08:00
|
|
|
*len = 0;
|
2018-07-06 09:07:18 +02:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
if (unlikely(vec_id >= BUF_VECTOR_MAX))
|
|
|
|
return -1;
|
|
|
|
|
2019-01-04 12:06:41 +08:00
|
|
|
if (unlikely(*desc_count >= vq->size))
|
|
|
|
return -1;
|
|
|
|
|
2018-07-06 09:07:18 +02:00
|
|
|
*desc_count += 1;
|
|
|
|
*buf_id = descs[avail_idx].id;
|
|
|
|
|
|
|
|
if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
|
|
|
|
if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
|
|
|
|
&descs[avail_idx],
|
|
|
|
&vec_id, buf_vec,
|
|
|
|
len, perm) < 0))
|
|
|
|
return -1;
|
|
|
|
} else {
|
2021-03-31 14:49:38 +08:00
|
|
|
dlen = descs[avail_idx].len;
|
|
|
|
*len += dlen;
|
2018-07-06 09:07:18 +02:00
|
|
|
|
|
|
|
if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
|
|
|
|
descs[avail_idx].addr,
|
2021-03-31 14:49:38 +08:00
|
|
|
dlen,
|
2018-07-06 09:07:18 +02:00
|
|
|
perm)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (++avail_idx >= vq->size) {
|
|
|
|
avail_idx -= vq->size;
|
|
|
|
wrap_counter ^= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*vec_idx = vec_id;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-29 15:04:18 +02:00
|
|
|
static __rte_noinline void
|
|
|
|
copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
|
struct buf_vector *buf_vec,
|
|
|
|
struct virtio_net_hdr_mrg_rxbuf *hdr)
|
|
|
|
{
|
|
|
|
uint64_t len;
|
|
|
|
uint64_t remain = dev->vhost_hlen;
|
|
|
|
uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
|
|
|
|
uint64_t iova = buf_vec->buf_iova;
|
|
|
|
|
|
|
|
while (remain) {
|
|
|
|
len = RTE_MIN(remain,
|
|
|
|
buf_vec->buf_len);
|
|
|
|
dst = buf_vec->buf_addr;
|
|
|
|
rte_memcpy((void *)(uintptr_t)dst,
|
|
|
|
(void *)(uintptr_t)src,
|
|
|
|
len);
|
|
|
|
|
|
|
|
PRINT_PACKET(dev, (uintptr_t)dst,
|
|
|
|
(uint32_t)len, 0);
|
2019-10-09 13:54:31 +02:00
|
|
|
vhost_log_cache_write_iova(dev, vq,
|
2019-05-29 15:04:18 +02:00
|
|
|
iova, len);
|
|
|
|
|
|
|
|
remain -= len;
|
|
|
|
iova += len;
|
|
|
|
src += len;
|
|
|
|
buf_vec++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline int
|
2018-05-29 11:45:13 +02:00
|
|
|
copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2017-09-08 20:50:46 +08:00
|
|
|
struct rte_mbuf *m, struct buf_vector *buf_vec,
|
2018-07-06 09:04:46 +02:00
|
|
|
uint16_t nr_vec, uint16_t num_buffers)
|
2016-03-14 15:35:22 +08:00
|
|
|
{
|
|
|
|
uint32_t vec_idx = 0;
|
|
|
|
uint32_t mbuf_offset, mbuf_avail;
|
2018-07-06 09:04:46 +02:00
|
|
|
uint32_t buf_offset, buf_avail;
|
|
|
|
uint64_t buf_addr, buf_iova, buf_len;
|
2016-03-14 15:35:22 +08:00
|
|
|
uint32_t cpy_len;
|
2018-07-06 09:04:46 +02:00
|
|
|
uint64_t hdr_addr;
|
2016-10-14 17:34:33 +08:00
|
|
|
struct rte_mbuf *hdr_mbuf;
|
2017-09-08 20:50:46 +08:00
|
|
|
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
|
2018-03-01 09:36:33 +01:00
|
|
|
struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
|
2017-09-08 20:50:46 +08:00
|
|
|
int error = 0;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2017-09-08 20:50:46 +08:00
|
|
|
if (unlikely(m == NULL)) {
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
|
|
|
|
if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
|
2017-09-08 20:50:46 +08:00
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
vhost: do sanity check for ring descriptor length
We need make sure that desc->len is bigger than the size of virtio net
header, otherwise, unexpected behaviour might happen due to "desc_avail"
would become a huge number with for following code:
desc_avail = desc->len - vq->vhost_hlen;
For dequeue code path, it will try to allocate enough mbuf to hold such
size of desc buf, which ends up with consuming all mbufs, leading to no
free mbuf is available. Therefore, you might see an error message:
Failed to allocate memory for mbuf.
Also, for both dequeue/enqueue code path, while it copies data from/to
desc buf, the big "desc_avail" would result to access memory not belong
the desc buf, which could lead to some potential memory access errors.
A malicious guest could easily forge such malformed vring desc buf. Every
time we restart an interrupted DPDK application inside guest would also
trigger this issue, as all huge pages are reset to 0 during DPDK re-init,
leading to desc->len being 0.
Therefore, this patch does a sanity check for desc->len, to make vhost
robust.
Reported-by: Rich Lane <rich.lane@bigswitch.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-03-10 12:32:44 +08:00
|
|
|
|
2016-10-14 17:34:33 +08:00
|
|
|
hdr_mbuf = m;
|
2018-07-06 09:04:46 +02:00
|
|
|
hdr_addr = buf_addr;
|
2021-04-07 11:25:15 +08:00
|
|
|
if (unlikely(buf_len < dev->vhost_hlen)) {
|
|
|
|
memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
2018-03-01 09:36:33 +01:00
|
|
|
hdr = &tmp_hdr;
|
2021-04-07 11:25:15 +08:00
|
|
|
} else
|
2018-03-01 09:36:33 +01:00
|
|
|
hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
|
2016-10-14 17:34:36 +08:00
|
|
|
dev->vid, num_buffers);
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
if (unlikely(buf_len < dev->vhost_hlen)) {
|
|
|
|
buf_offset = dev->vhost_hlen - buf_len;
|
|
|
|
vec_idx++;
|
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
buf_avail = buf_len - buf_offset;
|
2018-03-01 09:36:33 +01:00
|
|
|
} else {
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_offset = dev->vhost_hlen;
|
|
|
|
buf_avail = buf_len - dev->vhost_hlen;
|
2018-03-01 09:36:33 +01:00
|
|
|
}
|
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
mbuf_avail = rte_pktmbuf_data_len(m);
|
|
|
|
mbuf_offset = 0;
|
|
|
|
while (mbuf_avail != 0 || m->next != NULL) {
|
2018-07-06 09:04:46 +02:00
|
|
|
/* done with current buf, get the next one */
|
|
|
|
if (buf_avail == 0) {
|
2016-09-19 22:00:12 -04:00
|
|
|
vec_idx++;
|
2018-07-06 09:04:46 +02:00
|
|
|
if (unlikely(vec_idx >= nr_vec)) {
|
2017-09-08 20:50:46 +08:00
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-03-14 15:35:22 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
|
|
|
|
buf_offset = 0;
|
|
|
|
buf_avail = buf_len;
|
2014-08-15 12:58:01 +08:00
|
|
|
}
|
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
/* done with current mbuf, get the next one */
|
|
|
|
if (mbuf_avail == 0) {
|
|
|
|
m = m->next;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
mbuf_offset = 0;
|
|
|
|
mbuf_avail = rte_pktmbuf_data_len(m);
|
|
|
|
}
|
2015-06-09 09:03:03 +08:00
|
|
|
|
2016-10-14 17:34:33 +08:00
|
|
|
if (hdr_addr) {
|
2017-04-14 15:53:18 +08:00
|
|
|
virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
|
2018-05-29 11:45:13 +02:00
|
|
|
if (rxvq_is_mergeable(dev))
|
|
|
|
ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
|
|
|
|
num_buffers);
|
2017-04-14 15:53:18 +08:00
|
|
|
|
2018-03-01 09:36:33 +01:00
|
|
|
if (unlikely(hdr == &tmp_hdr)) {
|
2019-05-29 15:04:18 +02:00
|
|
|
copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
|
2018-03-01 09:36:33 +01:00
|
|
|
} else {
|
|
|
|
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
|
|
|
|
dev->vhost_hlen, 0);
|
2019-10-09 13:54:31 +02:00
|
|
|
vhost_log_cache_write_iova(dev, vq,
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_vec[0].buf_iova,
|
2018-03-01 09:36:33 +01:00
|
|
|
dev->vhost_hlen);
|
|
|
|
}
|
2016-10-14 17:34:33 +08:00
|
|
|
|
|
|
|
hdr_addr = 0;
|
|
|
|
}
|
|
|
|
|
2018-07-17 21:10:35 +08:00
|
|
|
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
|
2017-09-08 20:50:46 +08:00
|
|
|
|
2018-05-29 11:45:14 +02:00
|
|
|
if (likely(cpy_len > MAX_BATCH_LEN ||
|
|
|
|
vq->batch_copy_nb_elems >= vq->size)) {
|
2018-07-06 09:04:46 +02:00
|
|
|
rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
|
2017-09-08 20:50:46 +08:00
|
|
|
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
|
|
|
|
cpy_len);
|
2019-10-09 13:54:31 +02:00
|
|
|
vhost_log_cache_write_iova(dev, vq,
|
|
|
|
buf_iova + buf_offset,
|
|
|
|
cpy_len);
|
2018-07-06 09:04:46 +02:00
|
|
|
PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
|
2017-09-08 20:50:46 +08:00
|
|
|
cpy_len, 0);
|
|
|
|
} else {
|
2018-05-29 11:45:14 +02:00
|
|
|
batch_copy[vq->batch_copy_nb_elems].dst =
|
2018-07-06 09:04:46 +02:00
|
|
|
(void *)((uintptr_t)(buf_addr + buf_offset));
|
2018-05-29 11:45:14 +02:00
|
|
|
batch_copy[vq->batch_copy_nb_elems].src =
|
2017-09-08 20:50:46 +08:00
|
|
|
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
|
2018-05-29 11:45:14 +02:00
|
|
|
batch_copy[vq->batch_copy_nb_elems].log_addr =
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_iova + buf_offset;
|
2018-05-29 11:45:14 +02:00
|
|
|
batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
|
|
|
|
vq->batch_copy_nb_elems++;
|
2017-09-08 20:50:46 +08:00
|
|
|
}
|
2015-06-09 09:03:03 +08:00
|
|
|
|
2016-03-14 15:35:22 +08:00
|
|
|
mbuf_avail -= cpy_len;
|
|
|
|
mbuf_offset += cpy_len;
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_avail -= cpy_len;
|
|
|
|
buf_offset += cpy_len;
|
2016-03-14 15:35:22 +08:00
|
|
|
}
|
|
|
|
|
2017-09-08 20:50:46 +08:00
|
|
|
out:
|
|
|
|
|
|
|
|
return error;
|
2015-06-09 09:03:03 +08:00
|
|
|
}
|
|
|
|
|
2020-07-07 13:07:09 +08:00
|
|
|
static __rte_always_inline void
|
|
|
|
async_fill_vec(struct iovec *v, void *base, size_t len)
|
|
|
|
{
|
|
|
|
v->iov_base = base;
|
|
|
|
v->iov_len = len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
|
|
|
async_fill_iter(struct rte_vhost_iov_iter *it, size_t count,
|
|
|
|
struct iovec *vec, unsigned long nr_seg)
|
|
|
|
{
|
|
|
|
it->offset = 0;
|
|
|
|
it->count = count;
|
|
|
|
|
|
|
|
if (count) {
|
|
|
|
it->iov = vec;
|
|
|
|
it->nr_segs = nr_seg;
|
|
|
|
} else {
|
|
|
|
it->iov = 0;
|
|
|
|
it->nr_segs = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
|
|
|
async_fill_desc(struct rte_vhost_async_desc *desc,
|
|
|
|
struct rte_vhost_iov_iter *src, struct rte_vhost_iov_iter *dst)
|
|
|
|
{
|
|
|
|
desc->src = src;
|
|
|
|
desc->dst = dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline int
|
|
|
|
async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf *m, struct buf_vector *buf_vec,
|
|
|
|
uint16_t nr_vec, uint16_t num_buffers,
|
|
|
|
struct iovec *src_iovec, struct iovec *dst_iovec,
|
|
|
|
struct rte_vhost_iov_iter *src_it,
|
|
|
|
struct rte_vhost_iov_iter *dst_it)
|
|
|
|
{
|
2021-08-20 08:44:26 -04:00
|
|
|
struct rte_mbuf *hdr_mbuf;
|
|
|
|
struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
|
|
|
|
uint64_t buf_addr, buf_iova;
|
|
|
|
uint64_t hdr_addr;
|
|
|
|
uint64_t mapped_len;
|
2020-07-07 13:07:09 +08:00
|
|
|
uint32_t vec_idx = 0;
|
|
|
|
uint32_t mbuf_offset, mbuf_avail;
|
|
|
|
uint32_t buf_offset, buf_avail;
|
2021-08-20 08:44:26 -04:00
|
|
|
uint32_t cpy_len, buf_len;
|
2020-07-07 13:07:09 +08:00
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
uint32_t tlen = 0;
|
|
|
|
int tvec_idx = 0;
|
|
|
|
void *hpa;
|
|
|
|
|
|
|
|
if (unlikely(m == NULL)) {
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
|
|
|
|
if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
hdr_mbuf = m;
|
|
|
|
hdr_addr = buf_addr;
|
2021-04-07 11:25:16 +08:00
|
|
|
if (unlikely(buf_len < dev->vhost_hlen)) {
|
|
|
|
memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
|
2020-07-07 13:07:09 +08:00
|
|
|
hdr = &tmp_hdr;
|
2021-04-07 11:25:16 +08:00
|
|
|
} else
|
2020-07-07 13:07:09 +08:00
|
|
|
hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) RX: num merge buffers %d\n",
|
|
|
|
dev->vid, num_buffers);
|
|
|
|
|
|
|
|
if (unlikely(buf_len < dev->vhost_hlen)) {
|
|
|
|
buf_offset = dev->vhost_hlen - buf_len;
|
|
|
|
vec_idx++;
|
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
buf_avail = buf_len - buf_offset;
|
|
|
|
} else {
|
|
|
|
buf_offset = dev->vhost_hlen;
|
|
|
|
buf_avail = buf_len - dev->vhost_hlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
mbuf_avail = rte_pktmbuf_data_len(m);
|
|
|
|
mbuf_offset = 0;
|
|
|
|
|
|
|
|
while (mbuf_avail != 0 || m->next != NULL) {
|
|
|
|
/* done with current buf, get the next one */
|
|
|
|
if (buf_avail == 0) {
|
|
|
|
vec_idx++;
|
|
|
|
if (unlikely(vec_idx >= nr_vec)) {
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_iova = buf_vec[vec_idx].buf_iova;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
|
|
|
|
buf_offset = 0;
|
2021-08-20 08:44:26 -04:00
|
|
|
buf_avail = buf_len;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* done with current mbuf, get the next one */
|
|
|
|
if (mbuf_avail == 0) {
|
|
|
|
m = m->next;
|
|
|
|
|
|
|
|
mbuf_offset = 0;
|
2021-08-20 08:44:26 -04:00
|
|
|
mbuf_avail = rte_pktmbuf_data_len(m);
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr_addr) {
|
|
|
|
virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
|
|
|
|
if (rxvq_is_mergeable(dev))
|
|
|
|
ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
|
|
|
|
num_buffers);
|
|
|
|
|
|
|
|
if (unlikely(hdr == &tmp_hdr)) {
|
|
|
|
copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
|
|
|
|
} else {
|
|
|
|
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
|
|
|
|
dev->vhost_hlen, 0);
|
|
|
|
vhost_log_cache_write_iova(dev, vq,
|
|
|
|
buf_vec[0].buf_iova,
|
|
|
|
dev->vhost_hlen);
|
|
|
|
}
|
|
|
|
|
|
|
|
hdr_addr = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
while (unlikely(cpy_len)) {
|
2020-07-29 10:04:56 +08:00
|
|
|
hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
|
|
|
|
buf_iova + buf_offset,
|
|
|
|
cpy_len, &mapped_len);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (unlikely(!hpa)) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: failed to get hpa.\n",
|
|
|
|
dev->vid, __func__);
|
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
async_fill_vec(src_iovec + tvec_idx,
|
|
|
|
(void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
|
2020-07-29 10:04:56 +08:00
|
|
|
mbuf_offset), (size_t)mapped_len);
|
|
|
|
async_fill_vec(dst_iovec + tvec_idx,
|
|
|
|
hpa, (size_t)mapped_len);
|
2020-07-07 13:07:09 +08:00
|
|
|
|
2020-07-29 10:04:56 +08:00
|
|
|
tlen += (uint32_t)mapped_len;
|
|
|
|
cpy_len -= (uint32_t)mapped_len;
|
|
|
|
mbuf_avail -= (uint32_t)mapped_len;
|
|
|
|
mbuf_offset += (uint32_t)mapped_len;
|
|
|
|
buf_avail -= (uint32_t)mapped_len;
|
|
|
|
buf_offset += (uint32_t)mapped_len;
|
2020-07-07 13:07:09 +08:00
|
|
|
tvec_idx++;
|
2020-07-29 10:04:56 +08:00
|
|
|
}
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
async_fill_iter(src_it, tlen, src_iovec, tvec_idx);
|
|
|
|
async_fill_iter(dst_it, tlen, dst_iovec, tvec_idx);
|
2020-07-07 13:07:09 +08:00
|
|
|
out:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:21 +08:00
|
|
|
static __rte_always_inline int
|
|
|
|
vhost_enqueue_single_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf *pkt,
|
|
|
|
struct buf_vector *buf_vec,
|
|
|
|
uint16_t *nr_descs)
|
|
|
|
{
|
|
|
|
uint16_t nr_vec = 0;
|
|
|
|
uint16_t avail_idx = vq->last_avail_idx;
|
|
|
|
uint16_t max_tries, tries = 0;
|
|
|
|
uint16_t buf_id = 0;
|
|
|
|
uint32_t len = 0;
|
|
|
|
uint16_t desc_count;
|
2020-10-01 12:11:55 +02:00
|
|
|
uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
2019-10-25 00:08:21 +08:00
|
|
|
uint16_t num_buffers = 0;
|
2019-10-25 00:08:26 +08:00
|
|
|
uint32_t buffer_len[vq->size];
|
|
|
|
uint16_t buffer_buf_id[vq->size];
|
|
|
|
uint16_t buffer_desc_count[vq->size];
|
2019-10-25 00:08:21 +08:00
|
|
|
|
|
|
|
if (rxvq_is_mergeable(dev))
|
|
|
|
max_tries = vq->size - 1;
|
|
|
|
else
|
|
|
|
max_tries = 1;
|
|
|
|
|
|
|
|
while (size > 0) {
|
|
|
|
/*
|
|
|
|
* if we tried all available ring items, and still
|
|
|
|
* can't get enough buf, it means something abnormal
|
|
|
|
* happened.
|
|
|
|
*/
|
|
|
|
if (unlikely(++tries > max_tries))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (unlikely(fill_vec_buf_packed(dev, vq,
|
|
|
|
avail_idx, &desc_count,
|
|
|
|
buf_vec, &nr_vec,
|
|
|
|
&buf_id, &len,
|
|
|
|
VHOST_ACCESS_RW) < 0))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
len = RTE_MIN(len, size);
|
|
|
|
size -= len;
|
|
|
|
|
2019-10-25 00:08:26 +08:00
|
|
|
buffer_len[num_buffers] = len;
|
|
|
|
buffer_buf_id[num_buffers] = buf_id;
|
|
|
|
buffer_desc_count[num_buffers] = desc_count;
|
2019-10-25 00:08:21 +08:00
|
|
|
num_buffers += 1;
|
|
|
|
|
|
|
|
*nr_descs += desc_count;
|
|
|
|
avail_idx += desc_count;
|
|
|
|
if (avail_idx >= vq->size)
|
|
|
|
avail_idx -= vq->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (copy_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers) < 0)
|
|
|
|
return -1;
|
|
|
|
|
2019-10-25 00:08:26 +08:00
|
|
|
vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
|
|
|
|
buffer_desc_count, num_buffers);
|
|
|
|
|
2019-10-25 00:08:21 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-29 15:04:17 +02:00
|
|
|
static __rte_noinline uint32_t
|
2018-07-06 09:07:14 +02:00
|
|
|
virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2014-10-09 02:54:57 +08:00
|
|
|
struct rte_mbuf **pkts, uint32_t count)
|
2014-08-15 12:58:01 +08:00
|
|
|
{
|
2016-10-14 17:34:34 +08:00
|
|
|
uint32_t pkt_idx = 0;
|
|
|
|
uint16_t num_buffers;
|
2016-05-03 02:30:41 +08:00
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
2016-10-14 17:34:38 +08:00
|
|
|
uint16_t avail_head;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-12-19 09:21:10 +01:00
|
|
|
/*
|
|
|
|
* The ordering between avail index and
|
|
|
|
* desc reads needs to be enforced.
|
|
|
|
*/
|
2020-04-30 17:14:37 +08:00
|
|
|
avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
|
2018-12-19 09:21:10 +01:00
|
|
|
|
2018-12-19 09:21:12 +01:00
|
|
|
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
|
|
|
|
|
2014-08-15 12:58:01 +08:00
|
|
|
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
|
2016-05-02 07:58:52 +08:00
|
|
|
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
|
2018-07-06 09:04:46 +02:00
|
|
|
uint16_t nr_vec = 0;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
if (unlikely(reserve_avail_buf_split(dev, vq,
|
2016-10-18 17:35:38 +02:00
|
|
|
pkt_len, buf_vec, &num_buffers,
|
2018-07-06 09:04:46 +02:00
|
|
|
avail_head, &nr_vec) < 0)) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG,
|
2016-04-30 04:45:51 +08:00
|
|
|
"(%d) failed to get enough desc from vring\n",
|
2016-05-23 16:36:33 +08:00
|
|
|
dev->vid);
|
2016-10-14 17:34:36 +08:00
|
|
|
vq->shadow_used_idx -= num_buffers;
|
2016-03-14 15:35:22 +08:00
|
|
|
break;
|
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
|
2016-10-14 17:34:36 +08:00
|
|
|
dev->vid, vq->last_avail_idx,
|
|
|
|
vq->last_avail_idx + num_buffers);
|
2016-10-14 17:34:34 +08:00
|
|
|
|
2018-05-29 11:45:13 +02:00
|
|
|
if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_vec, nr_vec,
|
|
|
|
num_buffers) < 0) {
|
2016-10-14 17:34:36 +08:00
|
|
|
vq->shadow_used_idx -= num_buffers;
|
|
|
|
break;
|
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2016-10-14 17:34:35 +08:00
|
|
|
vq->last_avail_idx += num_buffers;
|
2015-09-21 16:16:14 +08:00
|
|
|
}
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2017-09-08 20:50:46 +08:00
|
|
|
do_data_copy_enqueue(dev, vq);
|
|
|
|
|
2016-10-14 17:34:36 +08:00
|
|
|
if (likely(vq->shadow_used_idx)) {
|
2018-07-06 09:07:15 +02:00
|
|
|
flush_shadow_used_ring_split(dev, vq);
|
2018-07-06 09:07:21 +02:00
|
|
|
vhost_vring_call_split(dev, vq);
|
2014-08-15 12:58:01 +08:00
|
|
|
}
|
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
return pkt_idx;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
static __rte_always_inline int
|
2021-07-08 10:25:28 +00:00
|
|
|
virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
|
2019-10-25 00:08:23 +08:00
|
|
|
struct vhost_virtqueue *vq,
|
2021-07-08 10:25:28 +00:00
|
|
|
struct rte_mbuf **pkts,
|
|
|
|
uint64_t *desc_addrs,
|
|
|
|
uint64_t *lens)
|
2019-10-25 00:08:23 +08:00
|
|
|
{
|
|
|
|
bool wrap_counter = vq->avail_wrap_counter;
|
|
|
|
struct vring_packed_desc *descs = vq->desc_packed;
|
|
|
|
uint16_t avail_idx = vq->last_avail_idx;
|
2020-10-01 12:11:55 +02:00
|
|
|
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
2019-10-25 00:08:23 +08:00
|
|
|
uint16_t i;
|
|
|
|
|
|
|
|
if (unlikely(avail_idx & PACKED_BATCH_MASK))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
if (unlikely(pkts[i]->next != NULL))
|
|
|
|
return -1;
|
|
|
|
if (unlikely(!desc_is_avail(&descs[avail_idx + i],
|
|
|
|
wrap_counter)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
lens[i] = descs[avail_idx + i].len;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
desc_addrs[i] = vhost_iova_to_vva(dev, vq,
|
|
|
|
descs[avail_idx + i].addr,
|
|
|
|
&lens[i],
|
|
|
|
VHOST_ACCESS_RW);
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
2020-05-18 14:17:02 +01:00
|
|
|
if (unlikely(!desc_addrs[i]))
|
|
|
|
return -1;
|
2019-10-25 00:08:23 +08:00
|
|
|
if (unlikely(lens[i] != descs[avail_idx + i].len))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-07-08 10:25:28 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
|
|
|
virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf **pkts,
|
|
|
|
uint64_t *desc_addrs,
|
|
|
|
uint64_t *lens)
|
|
|
|
{
|
|
|
|
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
|
|
|
struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
|
|
|
|
struct vring_packed_desc *descs = vq->desc_packed;
|
|
|
|
uint16_t avail_idx = vq->last_avail_idx;
|
|
|
|
uint16_t ids[PACKED_BATCH_SIZE];
|
|
|
|
uint16_t i;
|
|
|
|
|
2019-10-25 00:08:23 +08:00
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
|
|
|
|
hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
|
|
|
|
(uintptr_t)desc_addrs[i];
|
2020-10-01 12:11:55 +02:00
|
|
|
lens[i] = pkts[i]->pkt_len +
|
|
|
|
sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
2019-10-25 00:08:23 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
|
|
|
|
|
|
|
|
vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
|
|
|
|
rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
|
|
|
|
pkts[i]->pkt_len);
|
|
|
|
}
|
|
|
|
|
2019-11-04 19:15:47 +08:00
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
|
|
|
|
lens[i]);
|
|
|
|
|
2019-10-25 00:08:27 +08:00
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
ids[i] = descs[avail_idx + i].id;
|
|
|
|
|
|
|
|
vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
|
2021-07-08 10:25:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline int
|
|
|
|
virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf **pkts)
|
|
|
|
{
|
|
|
|
uint64_t desc_addrs[PACKED_BATCH_SIZE];
|
|
|
|
uint64_t lens[PACKED_BATCH_SIZE];
|
|
|
|
|
|
|
|
if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (vq->shadow_used_idx) {
|
|
|
|
do_data_copy_enqueue(dev, vq);
|
|
|
|
vhost_flush_enqueue_shadow_packed(dev, vq);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
static __rte_always_inline int16_t
|
2019-10-25 00:08:21 +08:00
|
|
|
virtio_dev_rx_single_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf *pkt)
|
|
|
|
{
|
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
|
|
|
uint16_t nr_descs = 0;
|
|
|
|
|
|
|
|
if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
|
|
|
|
&nr_descs) < 0)) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG,
|
2019-10-25 00:08:21 +08:00
|
|
|
"(%d) failed to get enough desc from vring\n",
|
|
|
|
dev->vid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
|
2019-10-25 00:08:21 +08:00
|
|
|
dev->vid, vq->last_avail_idx,
|
|
|
|
vq->last_avail_idx + nr_descs);
|
|
|
|
|
|
|
|
vq_inc_last_avail_packed(vq, nr_descs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-29 15:04:17 +02:00
|
|
|
static __rte_noinline uint32_t
|
2019-10-25 00:08:29 +08:00
|
|
|
virtio_dev_rx_packed(struct virtio_net *dev,
|
2020-07-10 10:38:50 +08:00
|
|
|
struct vhost_virtqueue *__rte_restrict vq,
|
|
|
|
struct rte_mbuf **__rte_restrict pkts,
|
2019-10-25 00:08:29 +08:00
|
|
|
uint32_t count)
|
2018-07-06 09:07:19 +02:00
|
|
|
{
|
|
|
|
uint32_t pkt_idx = 0;
|
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
do {
|
|
|
|
rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
|
2018-07-06 09:07:19 +02:00
|
|
|
|
2021-04-13 15:31:03 +02:00
|
|
|
if (count - pkt_idx >= PACKED_BATCH_SIZE) {
|
2021-07-08 10:25:28 +00:00
|
|
|
if (!virtio_dev_rx_sync_batch_packed(dev, vq,
|
2019-11-07 22:37:48 +08:00
|
|
|
&pkts[pkt_idx])) {
|
2019-10-25 00:08:29 +08:00
|
|
|
pkt_idx += PACKED_BATCH_SIZE;
|
|
|
|
continue;
|
|
|
|
}
|
2018-07-06 09:07:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
|
2018-07-06 09:07:19 +02:00
|
|
|
break;
|
2019-10-25 00:08:29 +08:00
|
|
|
pkt_idx++;
|
2018-07-06 09:07:19 +02:00
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
} while (pkt_idx < count);
|
2018-07-06 09:07:19 +02:00
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
if (vq->shadow_used_idx) {
|
|
|
|
do_data_copy_enqueue(dev, vq);
|
2019-10-25 00:08:26 +08:00
|
|
|
vhost_flush_enqueue_shadow_packed(dev, vq);
|
2018-07-06 09:07:19 +02:00
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:29 +08:00
|
|
|
if (pkt_idx)
|
|
|
|
vhost_vring_call_packed(dev, vq);
|
|
|
|
|
2018-07-06 09:07:19 +02:00
|
|
|
return pkt_idx;
|
|
|
|
}
|
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
static __rte_always_inline uint32_t
|
|
|
|
virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
|
|
|
|
struct rte_mbuf **pkts, uint32_t count)
|
|
|
|
{
|
|
|
|
struct vhost_virtqueue *vq;
|
2018-09-06 12:59:27 +08:00
|
|
|
uint32_t nb_tx = 0;
|
2018-07-06 09:07:14 +02:00
|
|
|
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
|
2018-07-06 09:07:14 +02:00
|
|
|
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
|
2018-07-06 09:07:14 +02:00
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
|
|
|
|
|
|
|
rte_spinlock_lock(&vq->access_lock);
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->enabled))
|
2018-07-06 09:07:14 +02:00
|
|
|
goto out_access_unlock;
|
|
|
|
|
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_lock(vq);
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->access_ok))
|
2018-07-06 09:07:14 +02:00
|
|
|
if (unlikely(vring_translate(dev, vq) < 0))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
|
|
|
|
if (count == 0)
|
|
|
|
goto out;
|
|
|
|
|
2018-07-06 09:07:19 +02:00
|
|
|
if (vq_is_packed(dev))
|
2018-09-06 12:59:27 +08:00
|
|
|
nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
|
2018-07-06 09:07:19 +02:00
|
|
|
else
|
2018-09-06 12:59:27 +08:00
|
|
|
nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
|
2018-07-06 09:07:14 +02:00
|
|
|
|
2017-10-05 10:36:25 +02:00
|
|
|
out:
|
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_unlock(vq);
|
|
|
|
|
2018-01-17 15:49:25 +02:00
|
|
|
out_access_unlock:
|
|
|
|
rte_spinlock_unlock(&vq->access_lock);
|
|
|
|
|
2018-09-06 12:59:27 +08:00
|
|
|
return nb_tx;
|
2014-08-15 12:58:01 +08:00
|
|
|
}
|
|
|
|
|
2014-10-09 02:54:45 +08:00
|
|
|
uint16_t
|
2016-06-13 17:55:49 +08:00
|
|
|
rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
|
2020-07-10 10:38:50 +08:00
|
|
|
struct rte_mbuf **__rte_restrict pkts, uint16_t count)
|
2014-10-09 02:54:45 +08:00
|
|
|
{
|
2016-06-13 17:55:49 +08:00
|
|
|
struct virtio_net *dev = get_device(vid);
|
|
|
|
|
|
|
|
if (!dev)
|
|
|
|
return 0;
|
|
|
|
|
2018-01-31 17:46:50 +00:00
|
|
|
if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR,
|
2018-01-31 17:46:50 +00:00
|
|
|
"(%d) %s: built-in vhost net backend is disabled.\n",
|
|
|
|
dev->vid, __func__);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-29 11:45:13 +02:00
|
|
|
return virtio_dev_rx(dev, queue_id, pkts, count);
|
2014-10-09 02:54:45 +08:00
|
|
|
}
|
|
|
|
|
2020-07-07 13:07:09 +08:00
|
|
|
static __rte_always_inline uint16_t
|
|
|
|
virtio_dev_rx_async_get_info_idx(uint16_t pkts_idx,
|
|
|
|
uint16_t vq_size, uint16_t n_inflight)
|
|
|
|
{
|
|
|
|
return pkts_idx > n_inflight ? (pkts_idx - n_inflight) :
|
2021-07-07 12:30:04 +02:00
|
|
|
(vq_size - n_inflight + pkts_idx) % vq_size;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:32 +00:00
|
|
|
static __rte_always_inline void
|
|
|
|
store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
|
|
|
|
uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
|
|
|
|
{
|
2021-05-12 02:30:41 +00:00
|
|
|
size_t elem_size = sizeof(struct vring_used_elem);
|
2021-04-27 08:03:32 +00:00
|
|
|
|
|
|
|
if (d_idx + count <= ring_size) {
|
|
|
|
rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
|
|
|
|
} else {
|
|
|
|
uint16_t size = ring_size - d_idx;
|
|
|
|
|
|
|
|
rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
|
|
|
|
rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
static __rte_always_inline void
|
|
|
|
store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
|
|
|
|
struct vring_used_elem_packed *d_ring,
|
|
|
|
uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
|
|
|
|
{
|
2021-05-12 02:30:56 +00:00
|
|
|
size_t elem_size = sizeof(struct vring_used_elem_packed);
|
2021-04-27 08:03:33 +00:00
|
|
|
|
|
|
|
if (d_idx + count <= ring_size) {
|
|
|
|
rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
|
|
|
|
} else {
|
|
|
|
uint16_t size = ring_size - d_idx;
|
|
|
|
|
|
|
|
rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
|
|
|
|
rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-07 13:07:09 +08:00
|
|
|
static __rte_noinline uint32_t
|
|
|
|
virtio_dev_rx_async_submit_split(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq, uint16_t queue_id,
|
2021-08-20 08:44:26 -04:00
|
|
|
struct rte_mbuf **pkts, uint32_t count)
|
2020-07-07 13:07:09 +08:00
|
|
|
{
|
2021-08-20 08:44:26 -04:00
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
2020-07-07 13:07:09 +08:00
|
|
|
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
|
|
|
|
uint16_t num_buffers;
|
2020-10-13 09:45:43 +08:00
|
|
|
uint16_t avail_head;
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
struct rte_vhost_iov_iter *it_pool = vq->it_pool;
|
|
|
|
struct iovec *vec_pool = vq->vec_pool;
|
|
|
|
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
|
|
|
|
struct iovec *src_iovec = vec_pool;
|
|
|
|
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
|
2020-10-13 09:45:43 +08:00
|
|
|
struct async_inflight_info *pkts_info = vq->async_pkts_info;
|
2021-01-11 07:16:26 -05:00
|
|
|
uint32_t n_pkts = 0, pkt_err = 0;
|
2021-07-23 08:09:34 +00:00
|
|
|
int32_t n_xfer;
|
2021-08-20 08:44:26 -04:00
|
|
|
uint16_t segs_await = 0;
|
|
|
|
uint16_t iovec_idx = 0, it_idx = 0, slot_idx = 0;
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
/*
|
2020-12-21 23:50:29 +08:00
|
|
|
* The ordering between avail index and desc reads need to be enforced.
|
2020-07-07 13:07:09 +08:00
|
|
|
*/
|
2020-12-21 23:50:29 +08:00
|
|
|
avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
|
|
|
|
|
|
|
|
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
|
|
|
|
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
|
|
|
|
uint16_t nr_vec = 0;
|
|
|
|
|
|
|
|
if (unlikely(reserve_avail_buf_split(dev, vq,
|
|
|
|
pkt_len, buf_vec, &num_buffers,
|
|
|
|
avail_head, &nr_vec) < 0)) {
|
|
|
|
VHOST_LOG_DATA(DEBUG,
|
|
|
|
"(%d) failed to get enough desc from vring\n",
|
|
|
|
dev->vid);
|
|
|
|
vq->shadow_used_idx -= num_buffers;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
|
|
|
|
dev->vid, vq->last_avail_idx,
|
|
|
|
vq->last_avail_idx + num_buffers);
|
|
|
|
|
2021-04-27 08:03:32 +00:00
|
|
|
if (async_mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers,
|
|
|
|
&src_iovec[iovec_idx], &dst_iovec[iovec_idx],
|
|
|
|
&it_pool[it_idx], &it_pool[it_idx + 1]) < 0) {
|
2020-07-07 13:07:09 +08:00
|
|
|
vq->shadow_used_idx -= num_buffers;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
async_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx],
|
|
|
|
&it_pool[it_idx + 1]);
|
2021-04-27 08:03:32 +00:00
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
slot_idx = (vq->async_pkts_idx + pkt_idx) & (vq->size - 1);
|
|
|
|
pkts_info[slot_idx].descs = num_buffers;
|
|
|
|
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
|
2021-04-27 08:03:32 +00:00
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
iovec_idx += it_pool[it_idx].nr_segs;
|
|
|
|
segs_await += it_pool[it_idx].nr_segs;
|
|
|
|
it_idx += 2;
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
vq->last_avail_idx += num_buffers;
|
|
|
|
|
2020-10-13 09:45:45 +08:00
|
|
|
/*
|
|
|
|
* conditions to trigger async device transfer:
|
|
|
|
* - buffered packet number reaches transfer threshold
|
|
|
|
* - unused async iov number is less than max vhost vector
|
|
|
|
*/
|
2021-01-11 07:16:27 -05:00
|
|
|
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
|
|
|
|
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await <
|
|
|
|
BUF_VECTOR_MAX))) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_xfer = vq->async_ops.transfer_data(dev->vid,
|
2020-07-07 13:07:09 +08:00
|
|
|
queue_id, tdes, 0, pkt_burst_idx);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (likely(n_xfer >= 0)) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_pkts = n_xfer;
|
|
|
|
} else {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"(%d) %s: failed to transfer data for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
n_pkts = 0;
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:32 +00:00
|
|
|
iovec_idx = 0;
|
|
|
|
it_idx = 0;
|
2020-10-13 09:45:45 +08:00
|
|
|
segs_await = 0;
|
2020-07-07 13:07:09 +08:00
|
|
|
|
2021-01-11 07:16:26 -05:00
|
|
|
if (unlikely(n_pkts < pkt_burst_idx)) {
|
2020-10-13 09:45:43 +08:00
|
|
|
/*
|
|
|
|
* log error packets number here and do actual
|
|
|
|
* error processing when applications poll
|
|
|
|
* completion
|
|
|
|
*/
|
|
|
|
pkt_err = pkt_burst_idx - n_pkts;
|
2021-08-20 08:44:26 -04:00
|
|
|
pkt_idx++;
|
2020-10-13 09:45:43 +08:00
|
|
|
pkt_burst_idx = 0;
|
|
|
|
break;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
pkt_burst_idx = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pkt_burst_idx) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (likely(n_xfer >= 0)) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_pkts = n_xfer;
|
|
|
|
} else {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
n_pkts = 0;
|
|
|
|
}
|
|
|
|
|
2021-01-11 07:16:26 -05:00
|
|
|
if (unlikely(n_pkts < pkt_burst_idx))
|
2020-10-13 09:45:43 +08:00
|
|
|
pkt_err = pkt_burst_idx - n_pkts;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2021-01-11 07:16:27 -05:00
|
|
|
if (unlikely(pkt_err)) {
|
|
|
|
uint16_t num_descs = 0;
|
2020-10-13 09:45:43 +08:00
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
/* update number of completed packets */
|
|
|
|
pkt_idx -= pkt_err;
|
|
|
|
|
|
|
|
/* calculate the sum of descriptors to revert */
|
2021-01-11 07:16:27 -05:00
|
|
|
while (pkt_err-- > 0) {
|
|
|
|
num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
|
|
|
|
slot_idx--;
|
|
|
|
}
|
2021-08-20 08:44:26 -04:00
|
|
|
|
2021-01-11 07:16:27 -05:00
|
|
|
/* recover shadow used ring and available ring */
|
2021-08-20 08:44:26 -04:00
|
|
|
vq->shadow_used_idx -= num_descs;
|
|
|
|
vq->last_avail_idx -= num_descs;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
/* keep used descriptors */
|
2021-01-11 07:16:27 -05:00
|
|
|
if (likely(vq->shadow_used_idx)) {
|
2021-08-20 08:44:26 -04:00
|
|
|
uint16_t to = vq->async_desc_idx_split & (vq->size - 1);
|
|
|
|
|
|
|
|
store_dma_desc_info_split(vq->shadow_used_split,
|
|
|
|
vq->async_descs_split, vq->size, 0, to,
|
|
|
|
vq->shadow_used_idx);
|
|
|
|
|
|
|
|
vq->async_desc_idx_split += vq->shadow_used_idx;
|
|
|
|
vq->async_pkts_idx += pkt_idx;
|
|
|
|
vq->async_pkts_inflight_n += pkt_idx;
|
|
|
|
vq->shadow_used_idx = 0;
|
2021-01-11 07:16:27 -05:00
|
|
|
}
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
return pkt_idx;
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_update_used_packed(struct vhost_virtqueue *vq,
|
|
|
|
struct vring_used_elem_packed *shadow_ring,
|
|
|
|
uint16_t count)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
uint16_t used_idx = vq->last_used_idx;
|
|
|
|
uint16_t head_idx = vq->last_used_idx;
|
|
|
|
uint16_t head_flags = 0;
|
|
|
|
|
|
|
|
if (count == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Split loop in two to save memory barriers */
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
vq->desc_packed[used_idx].id = shadow_ring[i].id;
|
|
|
|
vq->desc_packed[used_idx].len = shadow_ring[i].len;
|
|
|
|
|
|
|
|
used_idx += shadow_ring[i].count;
|
|
|
|
if (used_idx >= vq->size)
|
|
|
|
used_idx -= vq->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The ordering for storing desc flags needs to be enforced. */
|
|
|
|
rte_atomic_thread_fence(__ATOMIC_RELEASE);
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
uint16_t flags;
|
|
|
|
|
|
|
|
if (vq->shadow_used_packed[i].len)
|
|
|
|
flags = VRING_DESC_F_WRITE;
|
|
|
|
else
|
|
|
|
flags = 0;
|
|
|
|
|
|
|
|
if (vq->used_wrap_counter) {
|
|
|
|
flags |= VRING_DESC_F_USED;
|
|
|
|
flags |= VRING_DESC_F_AVAIL;
|
|
|
|
} else {
|
|
|
|
flags &= ~VRING_DESC_F_USED;
|
|
|
|
flags &= ~VRING_DESC_F_AVAIL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i > 0) {
|
|
|
|
vq->desc_packed[vq->last_used_idx].flags = flags;
|
|
|
|
} else {
|
|
|
|
head_idx = vq->last_used_idx;
|
|
|
|
head_flags = flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq_inc_last_used_packed(vq, shadow_ring[i].count);
|
|
|
|
}
|
|
|
|
|
|
|
|
vq->desc_packed[head_idx].flags = head_flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline int
|
2021-08-20 08:44:26 -04:00
|
|
|
vhost_enqueue_async_packed(struct virtio_net *dev,
|
2021-04-27 08:03:33 +00:00
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf *pkt,
|
|
|
|
struct buf_vector *buf_vec,
|
|
|
|
uint16_t *nr_descs,
|
|
|
|
uint16_t *nr_buffers,
|
|
|
|
struct iovec *src_iovec, struct iovec *dst_iovec,
|
|
|
|
struct rte_vhost_iov_iter *src_it,
|
|
|
|
struct rte_vhost_iov_iter *dst_it)
|
|
|
|
{
|
|
|
|
uint16_t nr_vec = 0;
|
|
|
|
uint16_t avail_idx = vq->last_avail_idx;
|
|
|
|
uint16_t max_tries, tries = 0;
|
|
|
|
uint16_t buf_id = 0;
|
|
|
|
uint32_t len = 0;
|
|
|
|
uint16_t desc_count = 0;
|
|
|
|
uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
|
|
|
uint32_t buffer_len[vq->size];
|
|
|
|
uint16_t buffer_buf_id[vq->size];
|
|
|
|
uint16_t buffer_desc_count[vq->size];
|
|
|
|
|
|
|
|
if (rxvq_is_mergeable(dev))
|
|
|
|
max_tries = vq->size - 1;
|
|
|
|
else
|
|
|
|
max_tries = 1;
|
|
|
|
|
|
|
|
while (size > 0) {
|
|
|
|
/*
|
|
|
|
* if we tried all available ring items, and still
|
|
|
|
* can't get enough buf, it means something abnormal
|
|
|
|
* happened.
|
|
|
|
*/
|
|
|
|
if (unlikely(++tries > max_tries))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (unlikely(fill_vec_buf_packed(dev, vq, avail_idx, &desc_count, buf_vec, &nr_vec,
|
|
|
|
&buf_id, &len, VHOST_ACCESS_RW) < 0))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
len = RTE_MIN(len, size);
|
|
|
|
size -= len;
|
|
|
|
|
|
|
|
buffer_len[*nr_buffers] = len;
|
|
|
|
buffer_buf_id[*nr_buffers] = buf_id;
|
|
|
|
buffer_desc_count[*nr_buffers] = desc_count;
|
|
|
|
*nr_buffers += 1;
|
|
|
|
*nr_descs += desc_count;
|
|
|
|
avail_idx += desc_count;
|
|
|
|
if (avail_idx >= vq->size)
|
|
|
|
avail_idx -= vq->size;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
if (unlikely(async_mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec,
|
|
|
|
*nr_buffers, src_iovec, dst_iovec,
|
|
|
|
src_it, dst_it) < 0))
|
2021-04-27 08:03:33 +00:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline int16_t
|
2021-08-20 08:44:26 -04:00
|
|
|
virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2021-04-27 08:03:33 +00:00
|
|
|
struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers,
|
|
|
|
struct iovec *src_iovec, struct iovec *dst_iovec,
|
|
|
|
struct rte_vhost_iov_iter *src_it, struct rte_vhost_iov_iter *dst_it)
|
|
|
|
{
|
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec, nr_descs, nr_buffers,
|
|
|
|
src_iovec, dst_iovec,
|
2021-04-27 08:03:33 +00:00
|
|
|
src_it, dst_it) < 0)) {
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) failed to get enough desc from vring\n", dev->vid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) current index %d | end index %d\n",
|
|
|
|
dev->vid, vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline void
|
2021-08-20 08:44:26 -04:00
|
|
|
dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx,
|
|
|
|
uint32_t nr_err, uint32_t *pkt_idx)
|
2021-04-27 08:03:33 +00:00
|
|
|
{
|
|
|
|
uint16_t descs_err = 0;
|
|
|
|
uint16_t buffers_err = 0;
|
|
|
|
struct async_inflight_info *pkts_info = vq->async_pkts_info;
|
|
|
|
|
|
|
|
*pkt_idx -= nr_err;
|
|
|
|
/* calculate the sum of buffers and descs of DMA-error packets. */
|
|
|
|
while (nr_err-- > 0) {
|
|
|
|
descs_err += pkts_info[slot_idx % vq->size].descs;
|
|
|
|
buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
|
|
|
|
slot_idx--;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vq->last_avail_idx >= descs_err) {
|
|
|
|
vq->last_avail_idx -= descs_err;
|
|
|
|
} else {
|
|
|
|
vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
|
|
|
|
vq->avail_wrap_counter ^= 1;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
vq->shadow_used_idx -= buffers_err;
|
2021-04-27 08:03:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_noinline uint32_t
|
|
|
|
virtio_dev_rx_async_submit_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq, uint16_t queue_id,
|
2021-08-20 08:44:26 -04:00
|
|
|
struct rte_mbuf **pkts, uint32_t count)
|
2021-04-27 08:03:33 +00:00
|
|
|
{
|
|
|
|
uint32_t pkt_idx = 0, pkt_burst_idx = 0;
|
2021-04-27 08:03:34 +00:00
|
|
|
uint32_t remained = count;
|
2021-08-20 08:44:26 -04:00
|
|
|
int32_t n_xfer;
|
2021-04-27 08:03:33 +00:00
|
|
|
uint16_t num_buffers;
|
|
|
|
uint16_t num_descs;
|
|
|
|
|
|
|
|
struct rte_vhost_iov_iter *it_pool = vq->it_pool;
|
|
|
|
struct iovec *vec_pool = vq->vec_pool;
|
|
|
|
struct rte_vhost_async_desc tdes[MAX_PKT_BURST];
|
|
|
|
struct iovec *src_iovec = vec_pool;
|
|
|
|
struct iovec *dst_iovec = vec_pool + (VHOST_MAX_ASYNC_VEC >> 1);
|
2021-08-20 08:44:26 -04:00
|
|
|
struct async_inflight_info *pkts_info = vq->async_pkts_info;
|
|
|
|
uint32_t n_pkts = 0, pkt_err = 0;
|
2021-04-27 08:03:33 +00:00
|
|
|
uint16_t slot_idx = 0;
|
|
|
|
uint16_t segs_await = 0;
|
|
|
|
uint16_t iovec_idx = 0, it_idx = 0;
|
|
|
|
|
2021-04-27 08:03:34 +00:00
|
|
|
do {
|
|
|
|
rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
|
2021-04-27 08:03:33 +00:00
|
|
|
|
|
|
|
num_buffers = 0;
|
|
|
|
num_descs = 0;
|
2021-08-20 08:44:26 -04:00
|
|
|
if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx],
|
2021-04-27 08:03:33 +00:00
|
|
|
&num_descs, &num_buffers,
|
|
|
|
&src_iovec[iovec_idx], &dst_iovec[iovec_idx],
|
|
|
|
&it_pool[it_idx], &it_pool[it_idx + 1]) < 0))
|
|
|
|
break;
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
slot_idx = (vq->async_pkts_idx + pkt_idx) % vq->size;
|
|
|
|
|
|
|
|
async_fill_desc(&tdes[pkt_burst_idx++], &it_pool[it_idx],
|
|
|
|
&it_pool[it_idx + 1]);
|
|
|
|
pkts_info[slot_idx].descs = num_descs;
|
|
|
|
pkts_info[slot_idx].nr_buffers = num_buffers;
|
|
|
|
pkts_info[slot_idx].mbuf = pkts[pkt_idx];
|
|
|
|
iovec_idx += it_pool[it_idx].nr_segs;
|
|
|
|
segs_await += it_pool[it_idx].nr_segs;
|
|
|
|
it_idx += 2;
|
2021-04-27 08:03:33 +00:00
|
|
|
|
2021-04-27 08:03:34 +00:00
|
|
|
pkt_idx++;
|
|
|
|
remained--;
|
2021-04-27 08:03:33 +00:00
|
|
|
vq_inc_last_avail_packed(vq, num_descs);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* conditions to trigger async device transfer:
|
|
|
|
* - buffered packet number reaches transfer threshold
|
|
|
|
* - unused async iov number is less than max vhost vector
|
|
|
|
*/
|
|
|
|
if (unlikely(pkt_burst_idx >= VHOST_ASYNC_BATCH_THRESHOLD ||
|
|
|
|
((VHOST_MAX_ASYNC_VEC >> 1) - segs_await < BUF_VECTOR_MAX))) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_xfer = vq->async_ops.transfer_data(dev->vid,
|
|
|
|
queue_id, tdes, 0, pkt_burst_idx);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (likely(n_xfer >= 0)) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_pkts = n_xfer;
|
|
|
|
} else {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"(%d) %s: failed to transfer data for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
n_pkts = 0;
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
iovec_idx = 0;
|
|
|
|
it_idx = 0;
|
|
|
|
segs_await = 0;
|
|
|
|
|
|
|
|
if (unlikely(n_pkts < pkt_burst_idx)) {
|
|
|
|
/*
|
|
|
|
* log error packets number here and do actual
|
|
|
|
* error processing when applications poll
|
|
|
|
* completion
|
|
|
|
*/
|
|
|
|
pkt_err = pkt_burst_idx - n_pkts;
|
|
|
|
pkt_burst_idx = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
pkt_burst_idx = 0;
|
|
|
|
}
|
2021-04-27 08:03:34 +00:00
|
|
|
} while (pkt_idx < count);
|
2021-04-27 08:03:33 +00:00
|
|
|
|
|
|
|
if (pkt_burst_idx) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_xfer = vq->async_ops.transfer_data(dev->vid, queue_id, tdes, 0, pkt_burst_idx);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (likely(n_xfer >= 0)) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_pkts = n_xfer;
|
|
|
|
} else {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: failed to transfer data for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
n_pkts = 0;
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
if (unlikely(n_pkts < pkt_burst_idx))
|
|
|
|
pkt_err = pkt_burst_idx - n_pkts;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(pkt_err))
|
2021-08-20 08:44:26 -04:00
|
|
|
dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
|
2021-04-27 08:03:33 +00:00
|
|
|
|
|
|
|
if (likely(vq->shadow_used_idx)) {
|
2021-08-20 08:44:26 -04:00
|
|
|
/* keep used descriptors. */
|
|
|
|
store_dma_desc_info_packed(vq->shadow_used_packed, vq->async_buffers_packed,
|
|
|
|
vq->size, 0, vq->async_buffer_idx_packed,
|
|
|
|
vq->shadow_used_idx);
|
|
|
|
|
|
|
|
vq->async_buffer_idx_packed += vq->shadow_used_idx;
|
|
|
|
if (vq->async_buffer_idx_packed >= vq->size)
|
|
|
|
vq->async_buffer_idx_packed -= vq->size;
|
|
|
|
|
|
|
|
vq->async_pkts_idx += pkt_idx;
|
|
|
|
if (vq->async_pkts_idx >= vq->size)
|
|
|
|
vq->async_pkts_idx -= vq->size;
|
|
|
|
|
|
|
|
vq->shadow_used_idx = 0;
|
|
|
|
vq->async_pkts_inflight_n += pkt_idx;
|
2021-04-27 08:03:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return pkt_idx;
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:32 +00:00
|
|
|
static __rte_always_inline void
|
|
|
|
write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
|
|
|
|
{
|
|
|
|
uint16_t nr_left = n_descs;
|
|
|
|
uint16_t nr_copy;
|
|
|
|
uint16_t to, from;
|
|
|
|
|
|
|
|
do {
|
2021-04-27 08:03:33 +00:00
|
|
|
from = vq->last_async_desc_idx_split & (vq->size - 1);
|
2021-04-27 08:03:32 +00:00
|
|
|
nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
|
|
|
|
to = vq->last_used_idx & (vq->size - 1);
|
|
|
|
|
|
|
|
if (to + nr_copy <= vq->size) {
|
|
|
|
rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
|
|
|
|
nr_copy * sizeof(struct vring_used_elem));
|
|
|
|
} else {
|
|
|
|
uint16_t size = vq->size - to;
|
|
|
|
|
|
|
|
rte_memcpy(&vq->used->ring[to], &vq->async_descs_split[from],
|
|
|
|
size * sizeof(struct vring_used_elem));
|
|
|
|
rte_memcpy(&vq->used->ring[0], &vq->async_descs_split[from + size],
|
|
|
|
(nr_copy - size) * sizeof(struct vring_used_elem));
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
vq->last_async_desc_idx_split += nr_copy;
|
2021-04-27 08:03:32 +00:00
|
|
|
vq->last_used_idx += nr_copy;
|
|
|
|
nr_left -= nr_copy;
|
|
|
|
} while (nr_left > 0);
|
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
static __rte_always_inline void
|
|
|
|
write_back_completed_descs_packed(struct vhost_virtqueue *vq,
|
|
|
|
uint16_t n_buffers)
|
|
|
|
{
|
|
|
|
uint16_t nr_left = n_buffers;
|
|
|
|
uint16_t from, to;
|
|
|
|
|
|
|
|
do {
|
2021-07-15 09:50:30 +00:00
|
|
|
from = vq->last_async_buffer_idx_packed;
|
2021-04-27 08:03:33 +00:00
|
|
|
to = (from + nr_left) % vq->size;
|
|
|
|
if (to > from) {
|
|
|
|
vhost_update_used_packed(vq, vq->async_buffers_packed + from, to - from);
|
|
|
|
vq->last_async_buffer_idx_packed += nr_left;
|
|
|
|
nr_left = 0;
|
|
|
|
} else {
|
|
|
|
vhost_update_used_packed(vq, vq->async_buffers_packed + from,
|
|
|
|
vq->size - from);
|
2021-07-15 09:50:30 +00:00
|
|
|
vq->last_async_buffer_idx_packed = 0;
|
2021-04-27 08:03:33 +00:00
|
|
|
nr_left -= vq->size - from;
|
|
|
|
}
|
|
|
|
} while (nr_left > 0);
|
|
|
|
}
|
|
|
|
|
2021-07-23 08:09:35 +00:00
|
|
|
static __rte_always_inline uint16_t
|
|
|
|
vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
|
2020-07-07 13:07:09 +08:00
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
struct vhost_virtqueue *vq;
|
2021-08-20 08:44:26 -04:00
|
|
|
struct async_inflight_info *pkts_info;
|
|
|
|
int32_t n_cpl;
|
2021-04-27 08:03:33 +00:00
|
|
|
uint16_t n_pkts_cpl = 0, n_pkts_put = 0, n_descs = 0, n_buffers = 0;
|
2020-07-07 13:07:09 +08:00
|
|
|
uint16_t start_idx, pkts_idx, vq_size;
|
2021-01-11 07:16:27 -05:00
|
|
|
uint16_t from, i;
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
2021-04-27 08:03:33 +00:00
|
|
|
pkts_idx = vq->async_pkts_idx % vq->size;
|
2020-10-13 09:45:43 +08:00
|
|
|
pkts_info = vq->async_pkts_info;
|
2020-07-07 13:07:09 +08:00
|
|
|
vq_size = vq->size;
|
|
|
|
start_idx = virtio_dev_rx_async_get_info_idx(pkts_idx,
|
|
|
|
vq_size, vq->async_pkts_inflight_n);
|
|
|
|
|
2021-07-23 08:09:34 +00:00
|
|
|
if (count > vq->async_last_pkts_n) {
|
2021-07-23 08:09:35 +00:00
|
|
|
n_cpl = vq->async_ops.check_completed_copies(dev->vid,
|
2020-10-13 09:45:43 +08:00
|
|
|
queue_id, 0, count - vq->async_last_pkts_n);
|
2021-08-20 08:44:26 -04:00
|
|
|
if (likely(n_cpl >= 0)) {
|
2021-07-23 08:09:34 +00:00
|
|
|
n_pkts_cpl = n_cpl;
|
|
|
|
} else {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"(%d) %s: failed to check completed copies for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
n_pkts_cpl = 0;
|
|
|
|
}
|
|
|
|
}
|
2020-07-07 13:07:09 +08:00
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
n_pkts_cpl += vq->async_last_pkts_n;
|
|
|
|
n_pkts_put = RTE_MIN(n_pkts_cpl, count);
|
2021-01-11 07:16:27 -05:00
|
|
|
if (unlikely(n_pkts_put == 0)) {
|
|
|
|
vq->async_last_pkts_n = n_pkts_cpl;
|
2021-07-23 08:09:35 +00:00
|
|
|
return 0;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
if (vq_is_packed(dev)) {
|
|
|
|
for (i = 0; i < n_pkts_put; i++) {
|
2021-07-07 12:30:04 +02:00
|
|
|
from = (start_idx + i) % vq_size;
|
2021-04-27 08:03:33 +00:00
|
|
|
n_buffers += pkts_info[from].nr_buffers;
|
|
|
|
pkts[i] = pkts_info[from].mbuf;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < n_pkts_put; i++) {
|
|
|
|
from = (start_idx + i) & (vq_size - 1);
|
|
|
|
n_descs += pkts_info[from].descs;
|
|
|
|
pkts[i] = pkts_info[from].mbuf;
|
|
|
|
}
|
2021-01-11 07:16:27 -05:00
|
|
|
}
|
|
|
|
vq->async_last_pkts_n = n_pkts_cpl - n_pkts_put;
|
|
|
|
vq->async_pkts_inflight_n -= n_pkts_put;
|
|
|
|
|
|
|
|
if (likely(vq->enabled && vq->access_ok)) {
|
2021-04-27 08:03:33 +00:00
|
|
|
if (vq_is_packed(dev)) {
|
|
|
|
write_back_completed_descs_packed(vq, n_buffers);
|
2020-07-07 13:07:09 +08:00
|
|
|
|
2021-04-27 08:03:33 +00:00
|
|
|
vhost_vring_call_packed(dev, vq);
|
|
|
|
} else {
|
|
|
|
write_back_completed_descs_split(vq, n_descs);
|
|
|
|
|
|
|
|
__atomic_add_fetch(&vq->used->idx, n_descs,
|
|
|
|
__ATOMIC_RELEASE);
|
|
|
|
vhost_vring_call_split(dev, vq);
|
|
|
|
}
|
|
|
|
} else {
|
2021-07-15 09:50:30 +00:00
|
|
|
if (vq_is_packed(dev)) {
|
2021-04-27 08:03:33 +00:00
|
|
|
vq->last_async_buffer_idx_packed += n_buffers;
|
2021-07-15 09:50:30 +00:00
|
|
|
if (vq->last_async_buffer_idx_packed >= vq->size)
|
|
|
|
vq->last_async_buffer_idx_packed -= vq->size;
|
|
|
|
} else {
|
2021-04-27 08:03:33 +00:00
|
|
|
vq->last_async_desc_idx_split += n_descs;
|
2021-07-15 09:50:30 +00:00
|
|
|
}
|
2021-04-27 08:03:33 +00:00
|
|
|
}
|
2020-07-07 13:07:09 +08:00
|
|
|
|
2021-07-23 08:09:35 +00:00
|
|
|
return n_pkts_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t
|
|
|
|
rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
|
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
struct virtio_net *dev = get_device(vid);
|
|
|
|
struct vhost_virtqueue *vq;
|
|
|
|
uint16_t n_pkts_cpl = 0;
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
if (unlikely(!dev))
|
2021-07-23 08:09:35 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
|
|
|
|
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
|
|
|
|
|
|
|
if (unlikely(!vq->async_registered)) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
rte_spinlock_lock(&vq->access_lock);
|
|
|
|
|
|
|
|
n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
|
|
|
|
|
2020-07-07 13:07:09 +08:00
|
|
|
rte_spinlock_unlock(&vq->access_lock);
|
|
|
|
|
2021-07-23 08:09:35 +00:00
|
|
|
return n_pkts_cpl;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t
|
|
|
|
rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
|
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
struct virtio_net *dev = get_device(vid);
|
|
|
|
struct vhost_virtqueue *vq;
|
|
|
|
uint16_t n_pkts_cpl = 0;
|
|
|
|
|
|
|
|
if (!dev)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
|
|
|
|
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
|
|
|
|
|
|
|
if (unlikely(!vq->async_registered)) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: async not registered for queue id %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count);
|
|
|
|
|
|
|
|
return n_pkts_cpl;
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static __rte_always_inline uint32_t
|
|
|
|
virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
|
2021-08-20 08:44:26 -04:00
|
|
|
struct rte_mbuf **pkts, uint32_t count)
|
2020-07-07 13:07:09 +08:00
|
|
|
{
|
|
|
|
struct vhost_virtqueue *vq;
|
|
|
|
uint32_t nb_tx = 0;
|
|
|
|
|
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
|
|
|
|
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
|
|
|
|
VHOST_LOG_DATA(ERR, "(%d) %s: invalid virtqueue idx %d.\n",
|
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
|
|
|
|
|
|
|
rte_spinlock_lock(&vq->access_lock);
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->enabled || !vq->async_registered))
|
2020-07-07 13:07:09 +08:00
|
|
|
goto out_access_unlock;
|
|
|
|
|
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_lock(vq);
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->access_ok))
|
2020-07-07 13:07:09 +08:00
|
|
|
if (unlikely(vring_translate(dev, vq) < 0))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
|
|
|
|
if (count == 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (vq_is_packed(dev))
|
2021-08-20 08:44:26 -04:00
|
|
|
nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id,
|
|
|
|
pkts, count);
|
2020-07-07 13:07:09 +08:00
|
|
|
else
|
2021-08-20 08:44:26 -04:00
|
|
|
nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id,
|
|
|
|
pkts, count);
|
2020-07-07 13:07:09 +08:00
|
|
|
|
|
|
|
out:
|
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_unlock(vq);
|
|
|
|
|
|
|
|
out_access_unlock:
|
|
|
|
rte_spinlock_unlock(&vq->access_lock);
|
|
|
|
|
|
|
|
return nb_tx;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint16_t
|
|
|
|
rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
|
2021-08-20 08:44:26 -04:00
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
2020-07-07 13:07:09 +08:00
|
|
|
{
|
|
|
|
struct virtio_net *dev = get_device(vid);
|
|
|
|
|
|
|
|
if (!dev)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"(%d) %s: built-in vhost net backend is disabled.\n",
|
|
|
|
dev->vid, __func__);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-08-20 08:44:26 -04:00
|
|
|
return virtio_dev_rx_async_submit(dev, queue_id, pkts, count);
|
2020-07-07 13:07:09 +08:00
|
|
|
}
|
|
|
|
|
2016-10-14 10:07:07 +02:00
|
|
|
static inline bool
|
|
|
|
virtio_net_with_host_offload(struct virtio_net *dev)
|
|
|
|
{
|
|
|
|
if (dev->features &
|
2017-06-28 15:40:31 +03:00
|
|
|
((1ULL << VIRTIO_NET_F_CSUM) |
|
|
|
|
(1ULL << VIRTIO_NET_F_HOST_ECN) |
|
|
|
|
(1ULL << VIRTIO_NET_F_HOST_TSO4) |
|
|
|
|
(1ULL << VIRTIO_NET_F_HOST_TSO6) |
|
|
|
|
(1ULL << VIRTIO_NET_F_HOST_UFO)))
|
2016-10-14 10:07:07 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-06-21 16:21:04 +08:00
|
|
|
static int
|
|
|
|
parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
|
2016-02-05 15:31:38 +08:00
|
|
|
{
|
2019-05-21 18:13:10 +02:00
|
|
|
struct rte_ipv4_hdr *ipv4_hdr;
|
|
|
|
struct rte_ipv6_hdr *ipv6_hdr;
|
2019-05-21 18:13:03 +02:00
|
|
|
struct rte_ether_hdr *eth_hdr;
|
2016-02-05 15:31:38 +08:00
|
|
|
uint16_t ethertype;
|
2021-06-21 16:21:04 +08:00
|
|
|
uint16_t data_len = rte_pktmbuf_data_len(m);
|
|
|
|
|
|
|
|
if (data_len < sizeof(struct rte_ether_hdr))
|
|
|
|
return -EINVAL;
|
2016-02-05 15:31:38 +08:00
|
|
|
|
2019-05-21 18:13:03 +02:00
|
|
|
eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
|
2016-02-05 15:31:38 +08:00
|
|
|
|
2019-05-21 18:13:03 +02:00
|
|
|
m->l2_len = sizeof(struct rte_ether_hdr);
|
2016-02-05 15:31:38 +08:00
|
|
|
ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
|
|
|
|
|
2019-05-21 18:13:05 +02:00
|
|
|
if (ethertype == RTE_ETHER_TYPE_VLAN) {
|
2021-06-21 16:21:04 +08:00
|
|
|
if (data_len < sizeof(struct rte_ether_hdr) +
|
|
|
|
sizeof(struct rte_vlan_hdr))
|
|
|
|
goto error;
|
|
|
|
|
2019-05-21 18:13:03 +02:00
|
|
|
struct rte_vlan_hdr *vlan_hdr =
|
|
|
|
(struct rte_vlan_hdr *)(eth_hdr + 1);
|
2016-02-05 15:31:38 +08:00
|
|
|
|
2019-05-21 18:13:03 +02:00
|
|
|
m->l2_len += sizeof(struct rte_vlan_hdr);
|
2016-02-05 15:31:38 +08:00
|
|
|
ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (ethertype) {
|
2019-05-29 13:29:16 +02:00
|
|
|
case RTE_ETHER_TYPE_IPV4:
|
2021-06-21 16:21:04 +08:00
|
|
|
if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
|
|
|
|
goto error;
|
|
|
|
ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
|
|
|
|
m->l2_len);
|
2020-10-12 16:55:46 +02:00
|
|
|
m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
|
2021-06-21 16:21:04 +08:00
|
|
|
if (data_len < m->l2_len + m->l3_len)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_IPV4;
|
2021-06-21 16:21:04 +08:00
|
|
|
*l4_proto = ipv4_hdr->next_proto_id;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
2019-05-29 13:29:16 +02:00
|
|
|
case RTE_ETHER_TYPE_IPV6:
|
2021-06-21 16:21:04 +08:00
|
|
|
if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
|
|
|
|
goto error;
|
|
|
|
ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
|
|
|
|
m->l2_len);
|
2019-05-21 18:13:10 +02:00
|
|
|
m->l3_len = sizeof(struct rte_ipv6_hdr);
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_IPV6;
|
2021-06-21 16:21:04 +08:00
|
|
|
*l4_proto = ipv6_hdr->proto;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
|
|
|
default:
|
2021-06-21 16:21:04 +08:00
|
|
|
/* a valid L3 header is needed for further L4 parsing */
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* both CSUM and GSO need a valid L4 header */
|
|
|
|
switch (*l4_proto) {
|
|
|
|
case IPPROTO_TCP:
|
|
|
|
if (data_len < m->l2_len + m->l3_len +
|
|
|
|
sizeof(struct rte_tcp_hdr))
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case IPPROTO_UDP:
|
|
|
|
if (data_len < m->l2_len + m->l3_len +
|
|
|
|
sizeof(struct rte_udp_hdr))
|
|
|
|
goto error;
|
|
|
|
break;
|
|
|
|
case IPPROTO_SCTP:
|
|
|
|
if (data_len < m->l2_len + m->l3_len +
|
|
|
|
sizeof(struct rte_sctp_hdr))
|
|
|
|
goto error;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
2021-06-21 16:21:04 +08:00
|
|
|
default:
|
|
|
|
goto error;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
2021-06-21 16:21:04 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
m->l2_len = 0;
|
|
|
|
m->l3_len = 0;
|
|
|
|
m->ol_flags = 0;
|
|
|
|
return -EINVAL;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline void
|
2021-05-03 18:43:44 +02:00
|
|
|
vhost_dequeue_offload_legacy(struct virtio_net_hdr *hdr, struct rte_mbuf *m)
|
2016-02-05 15:31:38 +08:00
|
|
|
{
|
2021-06-21 16:21:04 +08:00
|
|
|
uint8_t l4_proto = 0;
|
2019-05-21 18:13:13 +02:00
|
|
|
struct rte_tcp_hdr *tcp_hdr = NULL;
|
2021-06-21 16:21:04 +08:00
|
|
|
uint16_t tcp_len;
|
|
|
|
uint16_t data_len = rte_pktmbuf_data_len(m);
|
|
|
|
|
|
|
|
if (parse_headers(m, &l4_proto) < 0)
|
|
|
|
return;
|
2016-02-05 15:31:38 +08:00
|
|
|
|
|
|
|
if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
|
|
|
|
if (hdr->csum_start == (m->l2_len + m->l3_len)) {
|
|
|
|
switch (hdr->csum_offset) {
|
2019-05-21 18:13:13 +02:00
|
|
|
case (offsetof(struct rte_tcp_hdr, cksum)):
|
2021-06-21 16:21:04 +08:00
|
|
|
if (l4_proto != IPPROTO_TCP)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
2019-05-21 18:13:14 +02:00
|
|
|
case (offsetof(struct rte_udp_hdr, dgram_cksum)):
|
2021-06-21 16:21:04 +08:00
|
|
|
if (l4_proto != IPPROTO_UDP)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
2019-05-21 18:13:12 +02:00
|
|
|
case (offsetof(struct rte_sctp_hdr, cksum)):
|
2021-06-21 16:21:04 +08:00
|
|
|
if (l4_proto != IPPROTO_SCTP)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
|
|
|
default:
|
2021-06-21 16:21:04 +08:00
|
|
|
goto error;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
2021-06-21 16:21:04 +08:00
|
|
|
} else {
|
|
|
|
goto error;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-21 16:21:04 +08:00
|
|
|
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
|
2016-02-05 15:31:38 +08:00
|
|
|
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
|
|
|
|
case VIRTIO_NET_HDR_GSO_TCPV4:
|
|
|
|
case VIRTIO_NET_HDR_GSO_TCPV6:
|
2021-06-21 16:21:04 +08:00
|
|
|
if (l4_proto != IPPROTO_TCP)
|
|
|
|
goto error;
|
|
|
|
tcp_hdr = rte_pktmbuf_mtod_offset(m,
|
|
|
|
struct rte_tcp_hdr *,
|
|
|
|
m->l2_len + m->l3_len);
|
|
|
|
tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
|
|
|
|
if (data_len < m->l2_len + m->l3_len + tcp_len)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
|
2016-02-05 15:31:38 +08:00
|
|
|
m->tso_segsz = hdr->gso_size;
|
2021-06-21 16:21:04 +08:00
|
|
|
m->l4_len = tcp_len;
|
2016-02-05 15:31:38 +08:00
|
|
|
break;
|
2017-11-21 14:56:52 +08:00
|
|
|
case VIRTIO_NET_HDR_GSO_UDP:
|
2021-06-21 16:21:04 +08:00
|
|
|
if (l4_proto != IPPROTO_UDP)
|
|
|
|
goto error;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG;
|
2017-11-21 14:56:52 +08:00
|
|
|
m->tso_segsz = hdr->gso_size;
|
2019-05-21 18:13:14 +02:00
|
|
|
m->l4_len = sizeof(struct rte_udp_hdr);
|
2017-11-21 14:56:52 +08:00
|
|
|
break;
|
2016-02-05 15:31:38 +08:00
|
|
|
default:
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(WARNING,
|
2016-02-05 15:31:38 +08:00
|
|
|
"unsupported gso type %u.\n", hdr->gso_type);
|
2021-06-21 16:21:04 +08:00
|
|
|
goto error;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
|
|
|
}
|
2021-06-21 16:21:04 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
error:
|
|
|
|
m->l2_len = 0;
|
|
|
|
m->l3_len = 0;
|
|
|
|
m->ol_flags = 0;
|
2016-02-05 15:31:38 +08:00
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
static __rte_always_inline void
|
|
|
|
vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m,
|
|
|
|
bool legacy_ol_flags)
|
|
|
|
{
|
|
|
|
struct rte_net_hdr_lens hdr_lens;
|
|
|
|
int l4_supported = 0;
|
|
|
|
uint32_t ptype;
|
|
|
|
|
|
|
|
if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (legacy_ol_flags) {
|
|
|
|
vhost_dequeue_offload_legacy(hdr, m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN;
|
2021-05-03 18:43:44 +02:00
|
|
|
|
|
|
|
ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
|
|
|
|
m->packet_type = ptype;
|
|
|
|
if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
|
|
|
|
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
|
|
|
|
(ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
|
|
|
|
l4_supported = 1;
|
|
|
|
|
|
|
|
/* According to Virtio 1.1 spec, the device only needs to look at
|
|
|
|
* VIRTIO_NET_HDR_F_NEEDS_CSUM in the packet transmission path.
|
|
|
|
* This differs from the processing incoming packets path where the
|
|
|
|
* driver could rely on VIRTIO_NET_HDR_F_DATA_VALID flag set by the
|
|
|
|
* device.
|
|
|
|
*
|
|
|
|
* 5.1.6.2.1 Driver Requirements: Packet Transmission
|
|
|
|
* The driver MUST NOT set the VIRTIO_NET_HDR_F_DATA_VALID and
|
|
|
|
* VIRTIO_NET_HDR_F_RSC_INFO bits in flags.
|
|
|
|
*
|
|
|
|
* 5.1.6.2.2 Device Requirements: Packet Transmission
|
|
|
|
* The device MUST ignore flag bits that it does not recognize.
|
|
|
|
*/
|
|
|
|
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
|
|
|
|
uint32_t hdrlen;
|
|
|
|
|
|
|
|
hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
|
|
|
|
if (hdr->csum_start <= hdrlen && l4_supported != 0) {
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
|
2021-05-03 18:43:44 +02:00
|
|
|
} else {
|
|
|
|
/* Unknown proto or tunnel, do sw cksum. We can assume
|
|
|
|
* the cksum field is in the first segment since the
|
|
|
|
* buffers we provided to the host are large enough.
|
|
|
|
* In case of SCTP, this will be wrong since it's a CRC
|
|
|
|
* but there's nothing we can do.
|
|
|
|
*/
|
|
|
|
uint16_t csum = 0, off;
|
|
|
|
|
|
|
|
if (rte_raw_cksum_mbuf(m, hdr->csum_start,
|
|
|
|
rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
|
|
|
|
return;
|
|
|
|
if (likely(csum != 0xffff))
|
|
|
|
csum = ~csum;
|
|
|
|
off = hdr->csum_offset + hdr->csum_start;
|
|
|
|
if (rte_pktmbuf_data_len(m) >= off + 1)
|
|
|
|
*rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
|
|
|
|
if (hdr->gso_size == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
|
|
|
|
case VIRTIO_NET_HDR_GSO_TCPV4:
|
|
|
|
case VIRTIO_NET_HDR_GSO_TCPV6:
|
|
|
|
if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
|
|
|
|
break;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
|
2021-05-03 18:43:44 +02:00
|
|
|
m->tso_segsz = hdr->gso_size;
|
|
|
|
break;
|
|
|
|
case VIRTIO_NET_HDR_GSO_UDP:
|
|
|
|
if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
|
|
|
|
break;
|
2021-10-15 21:24:08 +02:00
|
|
|
m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
|
2021-05-03 18:43:44 +02:00
|
|
|
m->tso_segsz = hdr->gso_size;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-29 15:04:18 +02:00
|
|
|
static __rte_noinline void
|
|
|
|
copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
|
|
|
|
struct buf_vector *buf_vec)
|
|
|
|
{
|
|
|
|
uint64_t len;
|
|
|
|
uint64_t remain = sizeof(struct virtio_net_hdr);
|
|
|
|
uint64_t src;
|
|
|
|
uint64_t dst = (uint64_t)(uintptr_t)hdr;
|
|
|
|
|
|
|
|
while (remain) {
|
|
|
|
len = RTE_MIN(remain, buf_vec->buf_len);
|
|
|
|
src = buf_vec->buf_addr;
|
|
|
|
rte_memcpy((void *)(uintptr_t)dst,
|
|
|
|
(void *)(uintptr_t)src, len);
|
|
|
|
|
|
|
|
remain -= len;
|
|
|
|
dst += len;
|
|
|
|
buf_vec++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-13 14:57:25 +05:30
|
|
|
static __rte_always_inline int
|
2017-09-08 20:50:46 +08:00
|
|
|
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2018-07-06 09:04:46 +02:00
|
|
|
struct buf_vector *buf_vec, uint16_t nr_vec,
|
2021-05-03 18:43:44 +02:00
|
|
|
struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
|
|
|
|
bool legacy_ol_flags)
|
2016-03-10 12:32:39 +08:00
|
|
|
{
|
2018-07-06 09:04:46 +02:00
|
|
|
uint32_t buf_avail, buf_offset;
|
2020-09-28 11:17:12 +02:00
|
|
|
uint64_t buf_addr, buf_len;
|
2016-03-10 12:32:39 +08:00
|
|
|
uint32_t mbuf_avail, mbuf_offset;
|
|
|
|
uint32_t cpy_len;
|
|
|
|
struct rte_mbuf *cur = m, *prev = m;
|
2018-01-24 17:19:29 +01:00
|
|
|
struct virtio_net_hdr tmp_hdr;
|
2016-10-14 10:07:07 +02:00
|
|
|
struct virtio_net_hdr *hdr = NULL;
|
2016-03-10 12:32:46 +08:00
|
|
|
/* A counter to avoid desc dead loop chain */
|
2018-07-06 09:04:46 +02:00
|
|
|
uint16_t vec_idx = 0;
|
2017-09-08 20:50:46 +08:00
|
|
|
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
|
|
|
|
int error = 0;
|
2016-03-10 12:32:39 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
vhost: do sanity check for ring descriptor length
We need make sure that desc->len is bigger than the size of virtio net
header, otherwise, unexpected behaviour might happen due to "desc_avail"
would become a huge number with for following code:
desc_avail = desc->len - vq->vhost_hlen;
For dequeue code path, it will try to allocate enough mbuf to hold such
size of desc buf, which ends up with consuming all mbufs, leading to no
free mbuf is available. Therefore, you might see an error message:
Failed to allocate memory for mbuf.
Also, for both dequeue/enqueue code path, while it copies data from/to
desc buf, the big "desc_avail" would result to access memory not belong
the desc buf, which could lead to some potential memory access errors.
A malicious guest could easily forge such malformed vring desc buf. Every
time we restart an interrupted DPDK application inside guest would also
trigger this issue, as all huge pages are reset to 0 during DPDK re-init,
leading to desc->len being 0.
Therefore, this patch does a sanity check for desc->len, to make vhost
robust.
Reported-by: Rich Lane <rich.lane@bigswitch.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-03-10 12:32:44 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
|
2017-09-08 20:50:46 +08:00
|
|
|
error = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2016-07-15 14:15:05 +03:00
|
|
|
|
2016-10-14 10:07:07 +02:00
|
|
|
if (virtio_net_with_host_offload(dev)) {
|
2018-07-06 09:04:46 +02:00
|
|
|
if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
|
2018-01-24 17:19:29 +01:00
|
|
|
/*
|
|
|
|
* No luck, the virtio-net header doesn't fit
|
|
|
|
* in a contiguous virtual area.
|
|
|
|
*/
|
2019-05-29 15:04:18 +02:00
|
|
|
copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
|
2018-01-24 17:19:29 +01:00
|
|
|
hdr = &tmp_hdr;
|
|
|
|
} else {
|
2018-07-06 09:04:46 +02:00
|
|
|
hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
|
2018-01-24 17:19:29 +01:00
|
|
|
}
|
2016-10-14 10:07:07 +02:00
|
|
|
}
|
2016-05-02 17:46:17 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A virtio driver normally uses at least 2 desc buffers
|
|
|
|
* for Tx: the first for storing the header, and others
|
|
|
|
* for storing the data.
|
|
|
|
*/
|
2018-07-06 09:04:46 +02:00
|
|
|
if (unlikely(buf_len < dev->vhost_hlen)) {
|
|
|
|
buf_offset = dev->vhost_hlen - buf_len;
|
|
|
|
vec_idx++;
|
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
|
|
|
buf_avail = buf_len - buf_offset;
|
|
|
|
} else if (buf_len == dev->vhost_hlen) {
|
|
|
|
if (unlikely(++vec_idx >= nr_vec))
|
2017-09-08 20:50:46 +08:00
|
|
|
goto out;
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
2016-05-02 17:46:17 -07:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_offset = 0;
|
|
|
|
buf_avail = buf_len;
|
2016-05-02 17:46:17 -07:00
|
|
|
} else {
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_offset = dev->vhost_hlen;
|
|
|
|
buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
|
2016-05-02 17:46:17 -07:00
|
|
|
}
|
2016-03-10 12:32:39 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
PRINT_PACKET(dev,
|
|
|
|
(uintptr_t)(buf_addr + buf_offset),
|
|
|
|
(uint32_t)buf_avail, 0);
|
2016-10-14 10:07:07 +02:00
|
|
|
|
2016-03-10 12:32:39 +08:00
|
|
|
mbuf_offset = 0;
|
|
|
|
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
|
2016-05-02 17:46:17 -07:00
|
|
|
while (1) {
|
2018-07-06 09:04:46 +02:00
|
|
|
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
|
vhost: add dequeue zero copy
The basic idea of dequeue zero copy is, instead of copying data from
the desc buf, here we let the mbuf reference the desc buf addr directly.
Doing so, however, has one major issue: we can't update the used ring
at the end of rte_vhost_dequeue_burst. Because we don't do the copy
here, an update of the used ring would let the driver to reclaim the
desc buf. As a result, DPDK might reference a stale memory region.
To update the used ring properly, this patch does several tricks:
- when mbuf references a desc buf, refcnt is added by 1.
This is to pin lock the mbuf, so that a mbuf free from the DPDK
won't actually free it, instead, refcnt is subtracted by 1.
- We chain all those mbuf together (by tailq)
And we check it every time on the rte_vhost_dequeue_burst entrance,
to see if the mbuf is freed (when refcnt equals to 1). If that
happens, it means we are the last user of this mbuf and we are
safe to update the used ring.
- "struct zcopy_mbuf" is introduced, to associate an mbuf with the
right desc idx.
Dequeue zero copy is introduced for performance reason, and some rough
tests show about 50% perfomance boost for packet size 1500B. For small
packets, (e.g. 64B), it actually slows a bit down (well, it could up to
15%). That is expected because this patch introduces some extra works,
and it outweighs the benefit from saving few bytes copy.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
2016-10-09 15:27:57 +08:00
|
|
|
|
2020-09-28 11:17:12 +02:00
|
|
|
if (likely(cpy_len > MAX_BATCH_LEN ||
|
|
|
|
vq->batch_copy_nb_elems >= vq->size ||
|
|
|
|
(hdr && cur == m))) {
|
|
|
|
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
|
|
|
|
mbuf_offset),
|
2018-07-06 09:04:46 +02:00
|
|
|
(void *)((uintptr_t)(buf_addr +
|
2020-09-28 11:17:12 +02:00
|
|
|
buf_offset)), cpy_len);
|
|
|
|
} else {
|
|
|
|
batch_copy[vq->batch_copy_nb_elems].dst =
|
|
|
|
rte_pktmbuf_mtod_offset(cur, void *,
|
|
|
|
mbuf_offset);
|
|
|
|
batch_copy[vq->batch_copy_nb_elems].src =
|
|
|
|
(void *)((uintptr_t)(buf_addr + buf_offset));
|
|
|
|
batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
|
|
|
|
vq->batch_copy_nb_elems++;
|
vhost: add dequeue zero copy
The basic idea of dequeue zero copy is, instead of copying data from
the desc buf, here we let the mbuf reference the desc buf addr directly.
Doing so, however, has one major issue: we can't update the used ring
at the end of rte_vhost_dequeue_burst. Because we don't do the copy
here, an update of the used ring would let the driver to reclaim the
desc buf. As a result, DPDK might reference a stale memory region.
To update the used ring properly, this patch does several tricks:
- when mbuf references a desc buf, refcnt is added by 1.
This is to pin lock the mbuf, so that a mbuf free from the DPDK
won't actually free it, instead, refcnt is subtracted by 1.
- We chain all those mbuf together (by tailq)
And we check it every time on the rte_vhost_dequeue_burst entrance,
to see if the mbuf is freed (when refcnt equals to 1). If that
happens, it means we are the last user of this mbuf and we are
safe to update the used ring.
- "struct zcopy_mbuf" is introduced, to associate an mbuf with the
right desc idx.
Dequeue zero copy is introduced for performance reason, and some rough
tests show about 50% perfomance boost for packet size 1500B. For small
packets, (e.g. 64B), it actually slows a bit down (well, it could up to
15%). That is expected because this patch introduces some extra works,
and it outweighs the benefit from saving few bytes copy.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
2016-10-09 15:27:57 +08:00
|
|
|
}
|
2016-05-02 17:46:17 -07:00
|
|
|
|
|
|
|
mbuf_avail -= cpy_len;
|
|
|
|
mbuf_offset += cpy_len;
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_avail -= cpy_len;
|
|
|
|
buf_offset += cpy_len;
|
2016-05-02 17:46:17 -07:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
/* This buf reaches to its end, get the next one */
|
|
|
|
if (buf_avail == 0) {
|
|
|
|
if (++vec_idx >= nr_vec)
|
2016-05-02 17:46:17 -07:00
|
|
|
break;
|
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_addr = buf_vec[vec_idx].buf_addr;
|
|
|
|
buf_len = buf_vec[vec_idx].buf_len;
|
2016-07-15 14:15:05 +03:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
buf_offset = 0;
|
|
|
|
buf_avail = buf_len;
|
2018-01-24 17:19:29 +01:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
PRINT_PACKET(dev, (uintptr_t)buf_addr,
|
|
|
|
(uint32_t)buf_avail, 0);
|
2016-03-10 12:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This mbuf reaches to its end, get a new one
|
|
|
|
* to hold more data.
|
|
|
|
*/
|
|
|
|
if (mbuf_avail == 0) {
|
|
|
|
cur = rte_pktmbuf_alloc(mbuf_pool);
|
|
|
|
if (unlikely(cur == NULL)) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR, "Failed to "
|
2016-03-10 12:32:39 +08:00
|
|
|
"allocate memory for mbuf.\n");
|
2017-09-08 20:50:46 +08:00
|
|
|
error = -1;
|
|
|
|
goto out;
|
2016-03-10 12:32:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
prev->next = cur;
|
|
|
|
prev->data_len = mbuf_offset;
|
|
|
|
m->nb_segs += 1;
|
|
|
|
m->pkt_len += mbuf_offset;
|
|
|
|
prev = cur;
|
|
|
|
|
|
|
|
mbuf_offset = 0;
|
|
|
|
mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
prev->data_len = mbuf_offset;
|
|
|
|
m->pkt_len += mbuf_offset;
|
|
|
|
|
2016-10-14 10:07:07 +02:00
|
|
|
if (hdr)
|
2021-05-03 18:43:44 +02:00
|
|
|
vhost_dequeue_offload(hdr, m, legacy_ol_flags);
|
2016-03-10 12:32:39 +08:00
|
|
|
|
2017-09-08 20:50:46 +08:00
|
|
|
out:
|
|
|
|
|
|
|
|
return error;
|
2016-03-10 12:32:39 +08:00
|
|
|
}
|
|
|
|
|
2019-10-15 15:59:51 -03:00
|
|
|
static void
|
|
|
|
virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
|
|
|
|
{
|
|
|
|
rte_free(opaque);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
virtio_dev_extbuf_alloc(struct rte_mbuf *pkt, uint32_t size)
|
|
|
|
{
|
|
|
|
struct rte_mbuf_ext_shared_info *shinfo = NULL;
|
|
|
|
uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
|
|
|
|
uint16_t buf_len;
|
|
|
|
rte_iova_t iova;
|
|
|
|
void *buf;
|
|
|
|
|
2020-10-07 14:53:18 +02:00
|
|
|
total_len += sizeof(*shinfo) + sizeof(uintptr_t);
|
|
|
|
total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
|
2019-10-15 15:59:51 -03:00
|
|
|
|
|
|
|
if (unlikely(total_len > UINT16_MAX))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
buf_len = total_len;
|
|
|
|
buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
|
|
|
|
if (unlikely(buf == NULL))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Initialize shinfo */
|
2020-10-07 14:53:18 +02:00
|
|
|
shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
|
|
|
|
virtio_dev_extbuf_free, buf);
|
|
|
|
if (unlikely(shinfo == NULL)) {
|
|
|
|
rte_free(buf);
|
|
|
|
VHOST_LOG_DATA(ERR, "Failed to init shinfo\n");
|
|
|
|
return -1;
|
2019-10-15 15:59:51 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
iova = rte_malloc_virt2iova(buf);
|
|
|
|
rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
|
|
|
|
rte_pktmbuf_reset_headroom(pkt);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-06-08 13:41:11 +02:00
|
|
|
/*
|
|
|
|
* Prepare a host supported pktmbuf.
|
|
|
|
*/
|
2021-04-16 12:25:19 +02:00
|
|
|
static __rte_always_inline int
|
|
|
|
virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
|
|
|
|
uint32_t data_len)
|
|
|
|
{
|
|
|
|
if (rte_pktmbuf_tailroom(pkt) >= data_len)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* attach an external buffer if supported */
|
|
|
|
if (dev->extbuf && !virtio_dev_extbuf_alloc(pkt, data_len))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* check if chained buffers are allowed */
|
|
|
|
if (!dev->linearbuf)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
__rte_always_inline
|
|
|
|
static uint16_t
|
2018-07-06 09:07:14 +02:00
|
|
|
virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
2021-05-03 18:43:44 +02:00
|
|
|
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
|
|
|
|
bool legacy_ol_flags)
|
2014-02-10 13:57:48 +00:00
|
|
|
{
|
2018-07-06 09:07:14 +02:00
|
|
|
uint16_t i;
|
2016-03-10 12:32:39 +08:00
|
|
|
uint16_t free_entries;
|
2020-05-08 16:47:51 +05:30
|
|
|
uint16_t dropped = 0;
|
|
|
|
static bool allocerr_warned;
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2018-12-19 09:21:10 +01:00
|
|
|
/*
|
|
|
|
* The ordering between avail index and
|
|
|
|
* desc reads needs to be enforced.
|
|
|
|
*/
|
2020-04-30 17:14:37 +08:00
|
|
|
free_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
|
|
|
|
vq->last_avail_idx;
|
|
|
|
if (free_entries == 0)
|
|
|
|
return 0;
|
2018-12-19 09:21:10 +01:00
|
|
|
|
2018-12-19 09:21:12 +01:00
|
|
|
rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
|
|
|
|
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) %s\n", dev->vid, __func__);
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2016-03-10 12:32:39 +08:00
|
|
|
count = RTE_MIN(count, MAX_PKT_BURST);
|
|
|
|
count = RTE_MIN(count, free_entries);
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(DEBUG, "(%d) about to dequeue %u buffers\n",
|
2016-05-23 16:36:33 +08:00
|
|
|
dev->vid, count);
|
2014-02-10 13:57:48 +00:00
|
|
|
|
2021-06-08 13:41:11 +02:00
|
|
|
if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
|
|
|
|
return 0;
|
|
|
|
|
2016-03-10 12:32:39 +08:00
|
|
|
for (i = 0; i < count; i++) {
|
2018-07-06 09:04:46 +02:00
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
2018-09-17 11:54:42 +08:00
|
|
|
uint16_t head_idx;
|
2019-10-15 15:59:51 -03:00
|
|
|
uint32_t buf_len;
|
2018-07-06 09:07:14 +02:00
|
|
|
uint16_t nr_vec = 0;
|
2016-03-10 12:32:39 +08:00
|
|
|
int err;
|
2014-08-15 12:58:01 +08:00
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
if (unlikely(fill_vec_buf_split(dev, vq,
|
2018-07-06 09:04:46 +02:00
|
|
|
vq->last_avail_idx + i,
|
|
|
|
&nr_vec, buf_vec,
|
2019-10-15 15:59:51 -03:00
|
|
|
&head_idx, &buf_len,
|
2018-07-06 09:04:46 +02:00
|
|
|
VHOST_ACCESS_RO) < 0))
|
|
|
|
break;
|
2018-01-24 11:27:25 +01:00
|
|
|
|
2020-09-28 11:17:12 +02:00
|
|
|
update_shadow_used_ring_split(vq, head_idx, 0);
|
2016-09-27 10:42:49 +02:00
|
|
|
|
2021-06-08 13:41:11 +02:00
|
|
|
err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
|
|
|
|
if (unlikely(err)) {
|
2020-05-08 16:47:51 +05:30
|
|
|
/*
|
|
|
|
* mbuf allocation fails for jumbo packets when external
|
|
|
|
* buffer allocation is not allowed and linear buffer
|
|
|
|
* is required. Drop this packet.
|
|
|
|
*/
|
|
|
|
if (!allocerr_warned) {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"Failed mbuf alloc of size %d from %s on %s.\n",
|
|
|
|
buf_len, mbuf_pool->name, dev->ifname);
|
|
|
|
allocerr_warned = true;
|
|
|
|
}
|
|
|
|
dropped += 1;
|
|
|
|
i++;
|
2015-06-04 07:43:23 -07:00
|
|
|
break;
|
2020-05-08 16:47:51 +05:30
|
|
|
}
|
vhost: add dequeue zero copy
The basic idea of dequeue zero copy is, instead of copying data from
the desc buf, here we let the mbuf reference the desc buf addr directly.
Doing so, however, has one major issue: we can't update the used ring
at the end of rte_vhost_dequeue_burst. Because we don't do the copy
here, an update of the used ring would let the driver to reclaim the
desc buf. As a result, DPDK might reference a stale memory region.
To update the used ring properly, this patch does several tricks:
- when mbuf references a desc buf, refcnt is added by 1.
This is to pin lock the mbuf, so that a mbuf free from the DPDK
won't actually free it, instead, refcnt is subtracted by 1.
- We chain all those mbuf together (by tailq)
And we check it every time on the rte_vhost_dequeue_burst entrance,
to see if the mbuf is freed (when refcnt equals to 1). If that
happens, it means we are the last user of this mbuf and we are
safe to update the used ring.
- "struct zcopy_mbuf" is introduced, to associate an mbuf with the
right desc idx.
Dequeue zero copy is introduced for performance reason, and some rough
tests show about 50% perfomance boost for packet size 1500B. For small
packets, (e.g. 64B), it actually slows a bit down (well, it could up to
15%). That is expected because this patch introduces some extra works,
and it outweighs the benefit from saving few bytes copy.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
2016-10-09 15:27:57 +08:00
|
|
|
|
2018-07-06 09:04:46 +02:00
|
|
|
err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
|
2021-05-03 18:43:44 +02:00
|
|
|
mbuf_pool, legacy_ol_flags);
|
2016-03-10 12:32:39 +08:00
|
|
|
if (unlikely(err)) {
|
2020-05-08 16:47:51 +05:30
|
|
|
if (!allocerr_warned) {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"Failed to copy desc to mbuf on %s.\n",
|
|
|
|
dev->ifname);
|
|
|
|
allocerr_warned = true;
|
|
|
|
}
|
|
|
|
dropped += 1;
|
|
|
|
i++;
|
2014-08-15 12:58:01 +08:00
|
|
|
break;
|
2016-03-10 12:32:39 +08:00
|
|
|
}
|
vhost: add dequeue zero copy
The basic idea of dequeue zero copy is, instead of copying data from
the desc buf, here we let the mbuf reference the desc buf addr directly.
Doing so, however, has one major issue: we can't update the used ring
at the end of rte_vhost_dequeue_burst. Because we don't do the copy
here, an update of the used ring would let the driver to reclaim the
desc buf. As a result, DPDK might reference a stale memory region.
To update the used ring properly, this patch does several tricks:
- when mbuf references a desc buf, refcnt is added by 1.
This is to pin lock the mbuf, so that a mbuf free from the DPDK
won't actually free it, instead, refcnt is subtracted by 1.
- We chain all those mbuf together (by tailq)
And we check it every time on the rte_vhost_dequeue_burst entrance,
to see if the mbuf is freed (when refcnt equals to 1). If that
happens, it means we are the last user of this mbuf and we are
safe to update the used ring.
- "struct zcopy_mbuf" is introduced, to associate an mbuf with the
right desc idx.
Dequeue zero copy is introduced for performance reason, and some rough
tests show about 50% perfomance boost for packet size 1500B. For small
packets, (e.g. 64B), it actually slows a bit down (well, it could up to
15%). That is expected because this patch introduces some extra works,
and it outweighs the benefit from saving few bytes copy.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
2016-10-09 15:27:57 +08:00
|
|
|
}
|
2020-09-28 11:17:12 +02:00
|
|
|
|
2021-06-08 13:41:11 +02:00
|
|
|
if (dropped)
|
|
|
|
rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
|
|
|
|
|
2016-10-09 15:27:56 +08:00
|
|
|
vq->last_avail_idx += i;
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 22:36:11 +08:00
|
|
|
|
2020-09-28 11:17:12 +02:00
|
|
|
do_data_copy_dequeue(vq);
|
|
|
|
if (unlikely(i < count))
|
|
|
|
vq->shadow_used_idx = i;
|
|
|
|
if (likely(vq->shadow_used_idx)) {
|
|
|
|
flush_shadow_used_ring_split(dev, vq);
|
|
|
|
vhost_vring_call_split(dev, vq);
|
vhost: add dequeue zero copy
The basic idea of dequeue zero copy is, instead of copying data from
the desc buf, here we let the mbuf reference the desc buf addr directly.
Doing so, however, has one major issue: we can't update the used ring
at the end of rte_vhost_dequeue_burst. Because we don't do the copy
here, an update of the used ring would let the driver to reclaim the
desc buf. As a result, DPDK might reference a stale memory region.
To update the used ring properly, this patch does several tricks:
- when mbuf references a desc buf, refcnt is added by 1.
This is to pin lock the mbuf, so that a mbuf free from the DPDK
won't actually free it, instead, refcnt is subtracted by 1.
- We chain all those mbuf together (by tailq)
And we check it every time on the rte_vhost_dequeue_burst entrance,
to see if the mbuf is freed (when refcnt equals to 1). If that
happens, it means we are the last user of this mbuf and we are
safe to update the used ring.
- "struct zcopy_mbuf" is introduced, to associate an mbuf with the
right desc idx.
Dequeue zero copy is introduced for performance reason, and some rough
tests show about 50% perfomance boost for packet size 1500B. For small
packets, (e.g. 64B), it actually slows a bit down (well, it could up to
15%). That is expected because this patch introduces some extra works,
and it outweighs the benefit from saving few bytes copy.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Qian Xu <qian.q.xu@intel.com>
2016-10-09 15:27:57 +08:00
|
|
|
}
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 22:36:11 +08:00
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
return (i - dropped);
|
2018-07-06 09:07:14 +02:00
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
__rte_noinline
|
|
|
|
static uint16_t
|
|
|
|
virtio_dev_tx_split_legacy(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
|
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
__rte_noinline
|
|
|
|
static uint16_t
|
|
|
|
virtio_dev_tx_split_compliant(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
|
|
|
|
struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:25 +08:00
|
|
|
static __rte_always_inline int
|
|
|
|
vhost_reserve_avail_batch_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mbuf **pkts,
|
|
|
|
uint16_t avail_idx,
|
|
|
|
uintptr_t *desc_addrs,
|
|
|
|
uint16_t *ids)
|
|
|
|
{
|
|
|
|
bool wrap = vq->avail_wrap_counter;
|
|
|
|
struct vring_packed_desc *descs = vq->desc_packed;
|
|
|
|
uint64_t lens[PACKED_BATCH_SIZE];
|
|
|
|
uint64_t buf_lens[PACKED_BATCH_SIZE];
|
2020-10-01 12:11:55 +02:00
|
|
|
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
2019-10-25 00:08:25 +08:00
|
|
|
uint16_t flags, i;
|
|
|
|
|
|
|
|
if (unlikely(avail_idx & PACKED_BATCH_MASK))
|
|
|
|
return -1;
|
|
|
|
if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
flags = descs[avail_idx + i].flags;
|
|
|
|
if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
|
|
|
|
(wrap == !!(flags & VRING_DESC_F_USED)) ||
|
|
|
|
(flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2020-12-21 23:50:32 +08:00
|
|
|
rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
|
2019-10-25 00:08:25 +08:00
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
lens[i] = descs[avail_idx + i].len;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
desc_addrs[i] = vhost_iova_to_vva(dev, vq,
|
|
|
|
descs[avail_idx + i].addr,
|
|
|
|
&lens[i], VHOST_ACCESS_RW);
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
2020-05-18 14:17:02 +01:00
|
|
|
if (unlikely(!desc_addrs[i]))
|
|
|
|
return -1;
|
2019-10-25 00:08:25 +08:00
|
|
|
if (unlikely((lens[i] != descs[avail_idx + i].len)))
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
2021-04-16 12:25:19 +02:00
|
|
|
if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
|
|
|
|
goto err;
|
2019-10-25 00:08:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
|
2021-04-16 12:25:19 +02:00
|
|
|
goto err;
|
2019-10-25 00:08:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
2021-03-31 14:49:39 +08:00
|
|
|
pkts[i]->pkt_len = lens[i] - buf_offset;
|
2019-10-25 00:08:25 +08:00
|
|
|
pkts[i]->data_len = pkts[i]->pkt_len;
|
|
|
|
ids[i] = descs[avail_idx + i].id;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
err:
|
2019-10-25 00:08:25 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:31 +08:00
|
|
|
static __rte_always_inline int
|
2019-10-25 00:08:25 +08:00
|
|
|
virtio_dev_tx_batch_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
2021-05-03 18:43:44 +02:00
|
|
|
struct rte_mbuf **pkts,
|
|
|
|
bool legacy_ol_flags)
|
2019-10-25 00:08:25 +08:00
|
|
|
{
|
|
|
|
uint16_t avail_idx = vq->last_avail_idx;
|
2020-10-01 12:11:55 +02:00
|
|
|
uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
2021-02-05 15:47:58 +08:00
|
|
|
struct virtio_net_hdr *hdr;
|
2019-10-25 00:08:25 +08:00
|
|
|
uintptr_t desc_addrs[PACKED_BATCH_SIZE];
|
|
|
|
uint16_t ids[PACKED_BATCH_SIZE];
|
|
|
|
uint16_t i;
|
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
|
|
|
|
desc_addrs, ids))
|
2019-10-25 00:08:25 +08:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
|
|
|
|
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
|
|
|
|
rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
|
|
|
|
(void *)(uintptr_t)(desc_addrs[i] + buf_offset),
|
|
|
|
pkts[i]->pkt_len);
|
|
|
|
|
2021-02-05 15:47:58 +08:00
|
|
|
if (virtio_net_with_host_offload(dev)) {
|
|
|
|
vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
|
|
|
|
hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
|
2021-05-03 18:43:44 +02:00
|
|
|
vhost_dequeue_offload(hdr, pkts[i], legacy_ol_flags);
|
2021-02-05 15:47:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:32 +08:00
|
|
|
if (virtio_net_is_inorder(dev))
|
|
|
|
vhost_shadow_dequeue_batch_packed_inorder(vq,
|
|
|
|
ids[PACKED_BATCH_SIZE - 1]);
|
|
|
|
else
|
|
|
|
vhost_shadow_dequeue_batch_packed(dev, vq, ids);
|
2019-10-25 00:08:28 +08:00
|
|
|
|
2019-10-25 00:08:25 +08:00
|
|
|
vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:24 +08:00
|
|
|
static __rte_always_inline int
|
|
|
|
vhost_dequeue_single_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mempool *mbuf_pool,
|
2021-04-16 12:25:19 +02:00
|
|
|
struct rte_mbuf *pkts,
|
2019-10-25 00:08:24 +08:00
|
|
|
uint16_t *buf_id,
|
2021-05-03 18:43:44 +02:00
|
|
|
uint16_t *desc_count,
|
|
|
|
bool legacy_ol_flags)
|
2019-10-25 00:08:24 +08:00
|
|
|
{
|
|
|
|
struct buf_vector buf_vec[BUF_VECTOR_MAX];
|
|
|
|
uint32_t buf_len;
|
|
|
|
uint16_t nr_vec = 0;
|
|
|
|
int err;
|
2020-05-08 16:47:51 +05:30
|
|
|
static bool allocerr_warned;
|
2019-10-25 00:08:24 +08:00
|
|
|
|
|
|
|
if (unlikely(fill_vec_buf_packed(dev, vq,
|
|
|
|
vq->last_avail_idx, desc_count,
|
|
|
|
buf_vec, &nr_vec,
|
|
|
|
buf_id, &buf_len,
|
|
|
|
VHOST_ACCESS_RO) < 0))
|
|
|
|
return -1;
|
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
|
2020-05-08 16:47:51 +05:30
|
|
|
if (!allocerr_warned) {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"Failed mbuf alloc of size %d from %s on %s.\n",
|
|
|
|
buf_len, mbuf_pool->name, dev->ifname);
|
|
|
|
allocerr_warned = true;
|
|
|
|
}
|
2019-10-25 00:08:24 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
|
2021-05-03 18:43:44 +02:00
|
|
|
mbuf_pool, legacy_ol_flags);
|
2019-10-25 00:08:24 +08:00
|
|
|
if (unlikely(err)) {
|
2020-05-08 16:47:51 +05:30
|
|
|
if (!allocerr_warned) {
|
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"Failed to copy desc to mbuf on %s.\n",
|
|
|
|
dev->ifname);
|
|
|
|
allocerr_warned = true;
|
|
|
|
}
|
2019-10-25 00:08:24 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:31 +08:00
|
|
|
static __rte_always_inline int
|
2019-10-25 00:08:24 +08:00
|
|
|
virtio_dev_tx_single_packed(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *vq,
|
|
|
|
struct rte_mempool *mbuf_pool,
|
2021-05-03 18:43:44 +02:00
|
|
|
struct rte_mbuf *pkts,
|
|
|
|
bool legacy_ol_flags)
|
2019-10-25 00:08:24 +08:00
|
|
|
{
|
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
uint16_t buf_id, desc_count = 0;
|
|
|
|
int ret;
|
2019-10-25 00:08:24 +08:00
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
|
2021-05-03 18:43:44 +02:00
|
|
|
&desc_count, legacy_ol_flags);
|
2019-10-25 00:08:24 +08:00
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
if (likely(desc_count > 0)) {
|
|
|
|
if (virtio_net_is_inorder(dev))
|
|
|
|
vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
|
|
|
|
desc_count);
|
|
|
|
else
|
|
|
|
vhost_shadow_dequeue_single_packed(vq, buf_id,
|
|
|
|
desc_count);
|
2019-10-25 00:08:28 +08:00
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
vq_inc_last_avail_packed(vq, desc_count);
|
|
|
|
}
|
2019-10-25 00:08:24 +08:00
|
|
|
|
2020-05-08 16:47:51 +05:30
|
|
|
return ret;
|
2019-10-25 00:08:24 +08:00
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
__rte_always_inline
|
|
|
|
static uint16_t
|
2019-10-25 00:08:31 +08:00
|
|
|
virtio_dev_tx_packed(struct virtio_net *dev,
|
2020-07-10 10:38:50 +08:00
|
|
|
struct vhost_virtqueue *__rte_restrict vq,
|
2019-10-25 00:08:31 +08:00
|
|
|
struct rte_mempool *mbuf_pool,
|
2020-07-10 10:38:50 +08:00
|
|
|
struct rte_mbuf **__rte_restrict pkts,
|
2021-05-03 18:43:44 +02:00
|
|
|
uint32_t count,
|
|
|
|
bool legacy_ol_flags)
|
2019-10-25 00:08:31 +08:00
|
|
|
{
|
|
|
|
uint32_t pkt_idx = 0;
|
2018-07-06 09:07:20 +02:00
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
|
|
|
|
return 0;
|
|
|
|
|
2019-10-25 00:08:31 +08:00
|
|
|
do {
|
|
|
|
rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
|
2018-07-06 09:07:20 +02:00
|
|
|
|
2021-04-13 15:31:03 +02:00
|
|
|
if (count - pkt_idx >= PACKED_BATCH_SIZE) {
|
2021-04-16 12:25:19 +02:00
|
|
|
if (!virtio_dev_tx_batch_packed(dev, vq,
|
2021-05-03 18:43:44 +02:00
|
|
|
&pkts[pkt_idx],
|
|
|
|
legacy_ol_flags)) {
|
2019-10-25 00:08:31 +08:00
|
|
|
pkt_idx += PACKED_BATCH_SIZE;
|
|
|
|
continue;
|
|
|
|
}
|
2018-07-06 09:07:20 +02:00
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:31 +08:00
|
|
|
if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
|
2021-05-03 18:43:44 +02:00
|
|
|
pkts[pkt_idx],
|
|
|
|
legacy_ol_flags))
|
2019-10-25 00:08:31 +08:00
|
|
|
break;
|
|
|
|
pkt_idx++;
|
2021-04-13 15:31:03 +02:00
|
|
|
} while (pkt_idx < count);
|
2019-10-25 00:08:31 +08:00
|
|
|
|
2021-04-16 12:25:19 +02:00
|
|
|
if (pkt_idx != count)
|
|
|
|
rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
|
|
|
|
|
2020-01-29 20:33:10 +01:00
|
|
|
if (vq->shadow_used_idx) {
|
2018-07-06 09:07:20 +02:00
|
|
|
do_data_copy_dequeue(vq);
|
|
|
|
|
2020-04-17 10:39:05 +08:00
|
|
|
vhost_flush_dequeue_shadow_packed(dev, vq);
|
|
|
|
vhost_vring_call_packed(dev, vq);
|
2020-01-29 20:33:10 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 00:08:31 +08:00
|
|
|
return pkt_idx;
|
2018-07-06 09:07:20 +02:00
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
__rte_noinline
|
|
|
|
static uint16_t
|
|
|
|
virtio_dev_tx_packed_legacy(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
|
|
|
|
struct rte_mbuf **__rte_restrict pkts, uint32_t count)
|
|
|
|
{
|
|
|
|
return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
__rte_noinline
|
|
|
|
static uint16_t
|
|
|
|
virtio_dev_tx_packed_compliant(struct virtio_net *dev,
|
|
|
|
struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
|
|
|
|
struct rte_mbuf **__rte_restrict pkts, uint32_t count)
|
|
|
|
{
|
|
|
|
return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
|
|
|
|
}
|
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
uint16_t
|
|
|
|
rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
|
|
|
|
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
|
|
|
|
{
|
|
|
|
struct virtio_net *dev;
|
|
|
|
struct rte_mbuf *rarp_mbuf = NULL;
|
|
|
|
struct vhost_virtqueue *vq;
|
2020-04-24 00:54:49 +08:00
|
|
|
int16_t success = 1;
|
2018-07-06 09:07:14 +02:00
|
|
|
|
|
|
|
dev = get_device(vid);
|
|
|
|
if (!dev)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR,
|
2018-07-06 09:07:14 +02:00
|
|
|
"(%d) %s: built-in vhost net backend is disabled.\n",
|
|
|
|
dev->vid, __func__);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR,
|
|
|
|
"(%d) %s: invalid virtqueue idx %d.\n",
|
2018-07-06 09:07:14 +02:00
|
|
|
dev->vid, __func__, queue_id);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
vq = dev->virtqueue[queue_id];
|
|
|
|
|
|
|
|
if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
|
|
|
|
return 0;
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->enabled)) {
|
2018-07-25 16:21:42 +08:00
|
|
|
count = 0;
|
2018-07-06 09:07:14 +02:00
|
|
|
goto out_access_unlock;
|
2018-07-25 16:21:42 +08:00
|
|
|
}
|
2018-07-06 09:07:14 +02:00
|
|
|
|
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_lock(vq);
|
|
|
|
|
2021-03-23 10:02:19 +01:00
|
|
|
if (unlikely(!vq->access_ok))
|
2018-07-25 16:21:42 +08:00
|
|
|
if (unlikely(vring_translate(dev, vq) < 0)) {
|
|
|
|
count = 0;
|
2018-07-06 09:07:14 +02:00
|
|
|
goto out;
|
2018-07-25 16:21:42 +08:00
|
|
|
}
|
2018-07-06 09:07:14 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct a RARP broadcast packet, and inject it to the "pkts"
|
|
|
|
* array, to looks like that guest actually send such packet.
|
|
|
|
*
|
|
|
|
* Check user_send_rarp() for more information.
|
|
|
|
*
|
|
|
|
* broadcast_rarp shares a cacheline in the virtio_net structure
|
|
|
|
* with some fields that are accessed during enqueue and
|
2020-04-24 00:54:49 +08:00
|
|
|
* __atomic_compare_exchange_n causes a write if performed compare
|
|
|
|
* and exchange. This could result in false sharing between enqueue
|
|
|
|
* and dequeue.
|
2018-07-06 09:07:14 +02:00
|
|
|
*
|
|
|
|
* Prevent unnecessary false sharing by reading broadcast_rarp first
|
2020-04-24 00:54:49 +08:00
|
|
|
* and only performing compare and exchange if the read indicates it
|
|
|
|
* is likely to be set.
|
2018-07-06 09:07:14 +02:00
|
|
|
*/
|
2020-04-24 00:54:49 +08:00
|
|
|
if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
|
|
|
|
__atomic_compare_exchange_n(&dev->broadcast_rarp,
|
|
|
|
&success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
|
2018-07-06 09:07:14 +02:00
|
|
|
|
|
|
|
rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
|
|
|
|
if (rarp_mbuf == NULL) {
|
2019-12-04 16:07:29 +01:00
|
|
|
VHOST_LOG_DATA(ERR, "Failed to make RARP packet.\n");
|
2018-07-25 22:18:11 +08:00
|
|
|
count = 0;
|
|
|
|
goto out;
|
2018-07-06 09:07:14 +02:00
|
|
|
}
|
2021-09-15 16:54:47 +02:00
|
|
|
/*
|
|
|
|
* Inject it to the head of "pkts" array, so that switch's mac
|
|
|
|
* learning table will get updated first.
|
|
|
|
*/
|
|
|
|
pkts[0] = rarp_mbuf;
|
|
|
|
pkts++;
|
2018-07-06 09:07:14 +02:00
|
|
|
count -= 1;
|
|
|
|
}
|
|
|
|
|
2021-05-03 18:43:44 +02:00
|
|
|
if (vq_is_packed(dev)) {
|
|
|
|
if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
|
|
|
|
count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
|
|
|
|
else
|
|
|
|
count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
|
|
|
|
} else {
|
|
|
|
if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
|
|
|
|
count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
|
|
|
|
else
|
|
|
|
count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
|
|
|
|
}
|
2018-07-06 09:07:14 +02:00
|
|
|
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 22:36:11 +08:00
|
|
|
out:
|
2017-10-05 10:36:25 +02:00
|
|
|
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
|
|
|
|
vhost_user_iotlb_rd_unlock(vq);
|
|
|
|
|
2018-01-17 15:49:25 +02:00
|
|
|
out_access_unlock:
|
|
|
|
rte_spinlock_unlock(&vq->access_lock);
|
|
|
|
|
2021-09-15 16:54:47 +02:00
|
|
|
if (unlikely(rarp_mbuf != NULL))
|
2018-07-06 09:07:14 +02:00
|
|
|
count += 1;
|
vhost: broadcast RARP by injecting in receiving mbuf array
Broadcast RARP packet by injecting it to receiving mbuf array at
rte_vhost_dequeue_burst().
Commit 33226236a35e ("vhost: handle request to send RARP") iterates
all host interfaces and then broadcast it by all of them. It did
notify the switches about the new location of the migrated VM, however,
the mac learning table in the target host is wrong (at least in my
test with OVS):
$ ovs-appctl fdb/show ovsbr0
port VLAN MAC Age
1 0 b6:3c:72:71:cd:4d 10
LOCAL 0 b6:3c:72:71:cd:4e 10
LOCAL 0 52:54:00:12:34:68 9
1 0 56:f6:64:2c:bc:c0 1
Where 52:54:00:12:34:68 is the mac of the VM. As you can see from the
above, the port learned is "LOCAL", which is the "ovsbr0" port. That
is reasonable, since we indeed send the pkt by the "ovsbr0" interface.
The wrong mac table lead all the packets to the VM go to the "ovsbr0"
in the end, which ends up with all packets being lost, until the guest
send a ARP quest (or reply) to refresh the mac learning table.
Jianfeng then came up with a solution I have thought of firstly but NAKed
by myself, concerning it has potential issues [0]. The solution is as title
stated: broadcast the RARP packet by injecting it to the receiving mbuf
arrays at rte_vhost_dequeue_burst(). The re-bring of that idea made me
think it twice; it looked like a false concern to me then. And I had done
a rough verification: it worked as expected.
[0]: http://dpdk.org/ml/archives/dev/2016-February/033527.html
Another note is that while preparing this version, I found that DPDK has
some ARP related structures and macros defined. So, use them instead of
the one from standard header files here.
Cc: Thibaut Collet <thibaut.collet@6wind.com>
Suggested-by: Jianfeng Tan <jianfeng.tan@intel.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
2016-02-22 22:36:11 +08:00
|
|
|
|
2018-07-06 09:07:14 +02:00
|
|
|
return count;
|
2014-08-15 12:58:01 +08:00
|
|
|
}
|