vhost: support indirect Tx descriptors
Indirect descriptors are usually supported by virtio-net devices, allowing to dispatch a larger number of requests. When the virtio device sends a packet using indirect descriptors, only one slot is used in the ring, even for large packets. The main effect is to improve the 0% packet loss benchmark. A PVP benchmark using Moongen (64 bytes) on the TE, and testpmd (fwd io for host, macswap for VM) on DUT shows a +50% gain for zero loss. On the downside, micro-benchmark using testpmd txonly in VM and rxonly on host shows a loss between 1 and 4%. But depending on the needs, feature can be disabled at VM boot time by passing indirect_desc=off argument to vhost-user device in Qemu. Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com> Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
This commit is contained in:
parent
07c9d24e9d
commit
2304dd73d2
@ -36,6 +36,18 @@ New Features
|
||||
|
||||
This section is a comment. Make sure to start the actual text at the margin.
|
||||
|
||||
* **Added vhost-user indirect descriptors support.**
|
||||
|
||||
If indirect descriptor feature is negotiated, each packet sent by the guest
|
||||
will take exactly one slot in the enqueue virtqueue. Without the feature, in
|
||||
current version, even 64 bytes packets take two slots with Virtio PMD on guest
|
||||
side.
|
||||
|
||||
The main impact is better performance for 0% packet loss use-cases, as it
|
||||
behaves as if the virtqueue size was enlarged, so more packets can be buffered
|
||||
in case of system perturbations. On the downside, small performance degradation
|
||||
is measured when running micro-benchmarks.
|
||||
|
||||
|
||||
Resolved Issues
|
||||
---------------
|
||||
|
@ -65,7 +65,8 @@
|
||||
(1ULL << VIRTIO_NET_F_CSUM) | \
|
||||
(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO6))
|
||||
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
|
||||
(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
|
||||
|
||||
uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
|
||||
|
||||
|
@ -679,8 +679,8 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
|
||||
}
|
||||
|
||||
static inline int __attribute__((always_inline))
|
||||
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
||||
struct rte_mbuf *m, uint16_t desc_idx,
|
||||
copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
|
||||
uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
|
||||
struct rte_mempool *mbuf_pool)
|
||||
{
|
||||
struct vring_desc *desc;
|
||||
@ -693,8 +693,9 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
||||
/* A counter to avoid desc dead loop chain */
|
||||
uint32_t nr_desc = 1;
|
||||
|
||||
desc = &vq->desc[desc_idx];
|
||||
if (unlikely(desc->len < dev->vhost_hlen))
|
||||
desc = &descs[desc_idx];
|
||||
if (unlikely((desc->len < dev->vhost_hlen)) ||
|
||||
(desc->flags & VRING_DESC_F_INDIRECT))
|
||||
return -1;
|
||||
|
||||
desc_addr = gpa_to_vva(dev, desc->addr);
|
||||
@ -711,7 +712,9 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
||||
*/
|
||||
if (likely((desc->len == dev->vhost_hlen) &&
|
||||
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
|
||||
desc = &vq->desc[desc->next];
|
||||
desc = &descs[desc->next];
|
||||
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
|
||||
return -1;
|
||||
|
||||
desc_addr = gpa_to_vva(dev, desc->addr);
|
||||
if (unlikely(!desc_addr))
|
||||
@ -747,10 +750,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
||||
if ((desc->flags & VRING_DESC_F_NEXT) == 0)
|
||||
break;
|
||||
|
||||
if (unlikely(desc->next >= vq->size ||
|
||||
++nr_desc > vq->size))
|
||||
if (unlikely(desc->next >= max_desc ||
|
||||
++nr_desc > max_desc))
|
||||
return -1;
|
||||
desc = &descs[desc->next];
|
||||
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
|
||||
return -1;
|
||||
desc = &vq->desc[desc->next];
|
||||
|
||||
desc_addr = gpa_to_vva(dev, desc->addr);
|
||||
if (unlikely(!desc_addr))
|
||||
@ -878,19 +883,35 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
|
||||
/* Prefetch descriptor index. */
|
||||
rte_prefetch0(&vq->desc[desc_indexes[0]]);
|
||||
for (i = 0; i < count; i++) {
|
||||
struct vring_desc *desc;
|
||||
uint16_t sz, idx;
|
||||
int err;
|
||||
|
||||
if (likely(i + 1 < count))
|
||||
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
|
||||
|
||||
if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
|
||||
desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
|
||||
vq->desc[desc_indexes[i]].addr);
|
||||
if (unlikely(!desc))
|
||||
break;
|
||||
|
||||
rte_prefetch0(desc);
|
||||
sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
|
||||
idx = 0;
|
||||
} else {
|
||||
desc = vq->desc;
|
||||
sz = vq->size;
|
||||
idx = desc_indexes[i];
|
||||
}
|
||||
|
||||
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
|
||||
if (unlikely(pkts[i] == NULL)) {
|
||||
RTE_LOG(ERR, VHOST_DATA,
|
||||
"Failed to allocate memory for mbuf.\n");
|
||||
break;
|
||||
}
|
||||
err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
|
||||
mbuf_pool);
|
||||
err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
|
||||
if (unlikely(err)) {
|
||||
rte_pktmbuf_free(pkts[i]);
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user