vhost: support indirect Tx descriptors

Indirect descriptors are usually supported by virtio-net devices,
allowing to dispatch a larger number of requests.

When the virtio device sends a packet using indirect descriptors,
only one slot is used in the ring, even for large packets.

The main effect is to improve the 0% packet loss benchmark.
A PVP benchmark using Moongen (64 bytes) on the TE, and testpmd
(fwd io for host, macswap for VM) on DUT shows a +50% gain for
zero loss.

On the downside, micro-benchmark using testpmd txonly in VM and
rxonly on host shows a loss between 1 and 4%. But depending on
the needs, feature can be disabled at VM boot time by passing
indirect_desc=off argument to vhost-user device in Qemu.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
This commit is contained in:
Maxime Coquelin 2016-09-27 10:42:49 +02:00 committed by Yuanhan Liu
parent 07c9d24e9d
commit 2304dd73d2
3 changed files with 45 additions and 11 deletions

View File

@ -36,6 +36,18 @@ New Features
This section is a comment. Make sure to start the actual text at the margin.
* **Added vhost-user indirect descriptors support.**
If indirect descriptor feature is negotiated, each packet sent by the guest
will take exactly one slot in the enqueue virtqueue. Without the feature, in
current version, even 64 bytes packets take two slots with Virtio PMD on guest
side.
The main impact is better performance for 0% packet loss use-cases, as it
behaves as if the virtqueue size was enlarged, so more packets can be buffered
in case of system perturbations. On the downside, small performance degradation
is measured when running micro-benchmarks.
Resolved Issues
---------------

View File

@ -65,7 +65,8 @@
(1ULL << VIRTIO_NET_F_CSUM) | \
(1ULL << VIRTIO_NET_F_GUEST_CSUM) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
(1ULL << VIRTIO_NET_F_GUEST_TSO6))
(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_RING_F_INDIRECT_DESC))
uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;

View File

@ -679,8 +679,8 @@ make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac)
}
static inline int __attribute__((always_inline))
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, uint16_t desc_idx,
copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
uint16_t max_desc, struct rte_mbuf *m, uint16_t desc_idx,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
@ -693,8 +693,9 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
desc = &vq->desc[desc_idx];
if (unlikely(desc->len < dev->vhost_hlen))
desc = &descs[desc_idx];
if (unlikely((desc->len < dev->vhost_hlen)) ||
(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
@ -711,7 +712,9 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
*/
if (likely((desc->len == dev->vhost_hlen) &&
(desc->flags & VRING_DESC_F_NEXT) != 0)) {
desc = &vq->desc[desc->next];
desc = &descs[desc->next];
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
@ -747,10 +750,12 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
if ((desc->flags & VRING_DESC_F_NEXT) == 0)
break;
if (unlikely(desc->next >= vq->size ||
++nr_desc > vq->size))
if (unlikely(desc->next >= max_desc ||
++nr_desc > max_desc))
return -1;
desc = &descs[desc->next];
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
desc = &vq->desc[desc->next];
desc_addr = gpa_to_vva(dev, desc->addr);
if (unlikely(!desc_addr))
@ -878,19 +883,35 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
struct vring_desc *desc;
uint16_t sz, idx;
int err;
if (likely(i + 1 < count))
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
vq->desc[desc_indexes[i]].addr);
if (unlikely(!desc))
break;
rte_prefetch0(desc);
sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
idx = 0;
} else {
desc = vq->desc;
sz = vq->size;
idx = desc_indexes[i];
}
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
break;
}
err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i],
mbuf_pool);
err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
break;