vhost: add a flag to enable dequeue zero copy
Dequeue zero copy is disabled by default. Here add a new flag ``RTE_VHOST_USER_DEQUEUE_ZERO_COPY`` to explictily enable it. Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Tested-by: Qian Xu <qian.q.xu@intel.com>
This commit is contained in:
parent
b0a985d1f3
commit
9ba1e744ab
@ -79,7 +79,7 @@ The following is an overview of the Vhost API functions:
|
||||
``/dev/path`` character device file will be created. For vhost-user server
|
||||
mode, a Unix domain socket file ``path`` will be created.
|
||||
|
||||
Currently two flags are supported (these are valid for vhost-user only):
|
||||
Currently supported flags are (these are valid for vhost-user only):
|
||||
|
||||
- ``RTE_VHOST_USER_CLIENT``
|
||||
|
||||
@ -97,6 +97,38 @@ The following is an overview of the Vhost API functions:
|
||||
This reconnect option is enabled by default. However, it can be turned off
|
||||
by setting this flag.
|
||||
|
||||
- ``RTE_VHOST_USER_DEQUEUE_ZERO_COPY``
|
||||
|
||||
Dequeue zero copy will be enabled when this flag is set. It is disabled by
|
||||
default.
|
||||
|
||||
There are some truths (including limitations) you might want to know while
|
||||
setting this flag:
|
||||
|
||||
* zero copy is not good for small packets (typically for packet size below
|
||||
512).
|
||||
|
||||
* zero copy is really good for VM2VM case. For iperf between two VMs, the
|
||||
boost could be above 70% (when TSO is enableld).
|
||||
|
||||
* for VM2NIC case, the ``nb_tx_desc`` has to be small enough: <= 64 if virtio
|
||||
indirect feature is not enabled and <= 128 if it is enabled.
|
||||
|
||||
The is because when dequeue zero copy is enabled, guest Tx used vring will
|
||||
be updated only when corresponding mbuf is freed. Thus, the nb_tx_desc
|
||||
has to be small enough so that the PMD driver will run out of available
|
||||
Tx descriptors and free mbufs timely. Otherwise, guest Tx vring would be
|
||||
starved.
|
||||
|
||||
* Guest memory should be backended with huge pages to achieve better
|
||||
performance. Using 1G page size is the best.
|
||||
|
||||
When dequeue zero copy is enabled, the guest phys address and host phys
|
||||
address mapping has to be established. Using non-huge pages means far
|
||||
more page segments. To make it simple, DPDK vhost does a linear search
|
||||
of those segments, thus the fewer the segments, the quicker we will get
|
||||
the mapping. NOTE: we may speed it by using tree searching in future.
|
||||
|
||||
* ``rte_vhost_driver_session_start()``
|
||||
|
||||
This function starts the vhost session loop to handle vhost messages. It
|
||||
|
@ -45,6 +45,18 @@ New Features
|
||||
in an mbuf chain and retrieve its packet type by software.
|
||||
* Added new functions ``rte_get_ptype_*()`` to dump a packet type as a string.
|
||||
|
||||
* **Added vhost-user dequeue zero copy support**
|
||||
|
||||
The copy in dequeue path is saved, which is meant to improve the performance.
|
||||
In the VM2VM case, the boost is quite impressive. The bigger the packet size,
|
||||
the bigger performance boost you may get. However, for VM2NIC case, there
|
||||
are some limitations, yet the boost is not that impressive as VM2VM case.
|
||||
It may even drop quite a bit for small packets.
|
||||
|
||||
For such reason, this feature is disabled by default. It can be enabled when
|
||||
``RTE_VHOST_USER_DEQUEUE_ZERO_COPY`` flag is given. Check the vhost section
|
||||
at programming guide for more information.
|
||||
|
||||
* **Added vhost-user indirect descriptors support.**
|
||||
|
||||
If indirect descriptor feature is negotiated, each packet sent by the guest
|
||||
|
@ -53,6 +53,7 @@
|
||||
|
||||
#define RTE_VHOST_USER_CLIENT (1ULL << 0)
|
||||
#define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
|
||||
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
|
||||
|
||||
/* Enum for virtqueue management. */
|
||||
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
|
||||
|
@ -62,6 +62,7 @@ struct vhost_user_socket {
|
||||
int connfd;
|
||||
bool is_server;
|
||||
bool reconnect;
|
||||
bool dequeue_zero_copy;
|
||||
};
|
||||
|
||||
struct vhost_user_connection {
|
||||
@ -203,6 +204,9 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
|
||||
size = strnlen(vsocket->path, PATH_MAX);
|
||||
vhost_set_ifname(vid, vsocket->path, size);
|
||||
|
||||
if (vsocket->dequeue_zero_copy)
|
||||
vhost_enable_dequeue_zero_copy(vid);
|
||||
|
||||
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
|
||||
|
||||
vsocket->connfd = fd;
|
||||
@ -499,6 +503,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
|
||||
memset(vsocket, 0, sizeof(struct vhost_user_socket));
|
||||
vsocket->path = strdup(path);
|
||||
vsocket->connfd = -1;
|
||||
vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
|
||||
|
||||
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
|
||||
vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
|
||||
|
@ -291,6 +291,16 @@ vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
|
||||
dev->ifname[sizeof(dev->ifname) - 1] = '\0';
|
||||
}
|
||||
|
||||
void
|
||||
vhost_enable_dequeue_zero_copy(int vid)
|
||||
{
|
||||
struct virtio_net *dev = get_device(vid);
|
||||
|
||||
if (dev == NULL)
|
||||
return;
|
||||
|
||||
dev->dequeue_zero_copy = 1;
|
||||
}
|
||||
|
||||
int
|
||||
rte_vhost_get_numa_node(int vid)
|
||||
|
@ -278,6 +278,7 @@ void vhost_destroy_device(int);
|
||||
int alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx);
|
||||
|
||||
void vhost_set_ifname(int, const char *if_name, unsigned int if_len);
|
||||
void vhost_enable_dequeue_zero_copy(int vid);
|
||||
|
||||
/*
|
||||
* Backend-specific cleanup. Defined by vhost-cuse and vhost-user.
|
||||
|
Loading…
x
Reference in New Issue
Block a user