diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h index af40679496..c3c14816bb 100644 --- a/lib/librte_vhost/rte_vdpa.h +++ b/lib/librte_vhost/rte_vdpa.h @@ -173,4 +173,43 @@ rte_vdpa_get_device_num(void); */ int __rte_experimental rte_vhost_host_notifier_ctrl(int vid, bool enable); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Synchronize the available ring from guest to mediated ring, help to + * check desc validity to protect against malicious guest driver. + * + * @param vid + * vhost device id + * @param qid + * vhost queue id + * @param vring_m + * mediated virtio ring pointer + * @return + * number of synced available entries on success, -1 on failure + */ +int __rte_experimental +rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Synchronize the used ring from mediated ring to guest, log dirty + * page for each writeable buffer, caller should handle the used + * ring logging before device stop. + * + * @param vid + * vhost device id + * @param qid + * vhost queue id + * @param vring_m + * mediated virtio ring pointer + * @return + * number of synced used entries on success, -1 on failure + */ +int __rte_experimental +rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m); #endif /* _RTE_VDPA_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 22302e9729..dd3b4c1cbf 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -84,4 +84,6 @@ EXPERIMENTAL { rte_vhost_crypto_set_zero_copy; rte_vhost_va_from_guest_pa; rte_vhost_host_notifier_ctrl; + rte_vdpa_relay_vring_avail; + rte_vdpa_relay_vring_used; }; diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c index e7d849ee0f..240a1fe3a1 100644 --- a/lib/librte_vhost/vdpa.c +++ b/lib/librte_vhost/vdpa.c @@ -122,3 +122,197 @@ rte_vdpa_get_device_num(void) { return vdpa_device_num; } + +static bool +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t desc_iova, uint64_t desc_len, uint8_t perm) +{ + uint64_t desc_addr, desc_chunck_len; + + while (desc_len) { + desc_chunck_len = desc_len; + desc_addr = vhost_iova_to_vva(dev, vq, + desc_iova, + &desc_chunck_len, + perm); + + if (!desc_addr) + return true; + + desc_len -= desc_chunck_len; + desc_iova += desc_chunck_len; + } + + return false; +} + +int __rte_experimental +rte_vdpa_relay_vring_avail(int vid, uint16_t qid, void *vring_m) +{ + struct virtio_net *dev = get_device(vid); + uint16_t idx, idx_m, desc_id; + struct vring_desc desc; + struct vhost_virtqueue *vq; + struct vring_desc *desc_ring; + struct vring_desc *idesc = NULL; + struct vring *s_vring; + uint64_t dlen; + int ret; + uint8_t perm; + + if (!dev || !vring_m) + return -1; + + if (qid >= dev->nr_vring) + return -1; + + if (vq_is_packed(dev)) + return -1; + + s_vring = (struct vring *)vring_m; + vq = dev->virtqueue[qid]; + idx = vq->avail->idx; + idx_m = s_vring->avail->idx; + ret = (uint16_t)(idx - idx_m); + + while (idx_m != idx) { + /* avail entry copy */ + desc_id = vq->avail->ring[idx_m & (vq->size - 1)]; + s_vring->avail->ring[idx_m & (vq->size - 1)] = desc_id; + desc_ring = vq->desc; + + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) { + dlen = vq->desc[desc_id].len; + desc_ring = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, + vq->desc[desc_id].addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!desc_ring)) + return -1; + + if (unlikely(dlen < vq->desc[idx].len)) { + idesc = alloc_copy_ind_table(dev, vq, + vq->desc[idx].addr, + vq->desc[idx].len); + if (unlikely(!idesc)) + return -1; + + desc_ring = idesc; + } + + desc_id = 0; + } + + /* check if the buf addr is within the guest memory */ + do { + desc = desc_ring[desc_id]; + perm = desc.flags & VRING_DESC_F_WRITE ? + VHOST_ACCESS_WO : VHOST_ACCESS_RO; + if (invalid_desc_check(dev, vq, desc.addr, desc.len, + perm)) { + if (unlikely(idesc)) + free_ind_table(idesc); + return -1; + } + desc_id = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); + + if (unlikely(idesc)) { + free_ind_table(idesc); + idesc = NULL; + } + + idx_m++; + } + + rte_smp_wmb(); + s_vring->avail->idx = idx; + + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + vhost_avail_event(vq) = idx; + + return ret; +} + +int __rte_experimental +rte_vdpa_relay_vring_used(int vid, uint16_t qid, void *vring_m) +{ + struct virtio_net *dev = get_device(vid); + uint16_t idx, idx_m, desc_id; + struct vhost_virtqueue *vq; + struct vring_desc desc; + struct vring_desc *desc_ring; + struct vring_desc *idesc = NULL; + struct vring *s_vring; + uint64_t dlen; + int ret; + + if (!dev || !vring_m) + return -1; + + if (qid >= dev->nr_vring) + return -1; + + if (vq_is_packed(dev)) + return -1; + + s_vring = (struct vring *)vring_m; + vq = dev->virtqueue[qid]; + idx = vq->used->idx; + idx_m = s_vring->used->idx; + ret = (uint16_t)(idx_m - idx); + + while (idx != idx_m) { + /* copy used entry, used ring logging is not covered here */ + vq->used->ring[idx & (vq->size - 1)] = + s_vring->used->ring[idx & (vq->size - 1)]; + + desc_id = vq->used->ring[idx & (vq->size - 1)].id; + desc_ring = vq->desc; + + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) { + dlen = vq->desc[desc_id].len; + desc_ring = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, + vq->desc[desc_id].addr, &dlen, + VHOST_ACCESS_RO); + if (unlikely(!desc_ring)) + return -1; + + if (unlikely(dlen < vq->desc[idx].len)) { + idesc = alloc_copy_ind_table(dev, vq, + vq->desc[idx].addr, + vq->desc[idx].len); + if (unlikely(!idesc)) + return -1; + + desc_ring = idesc; + } + + desc_id = 0; + } + + /* dirty page logging for DMA writeable buffer */ + do { + desc = desc_ring[desc_id]; + if (desc.flags & VRING_DESC_F_WRITE) + vhost_log_write(dev, desc.addr, desc.len); + desc_id = desc.next; + } while (desc.flags & VRING_DESC_F_NEXT); + + if (unlikely(idesc)) { + free_ind_table(idesc); + idesc = NULL; + } + + idx++; + } + + rte_smp_wmb(); + vq->used->idx = idx_m; + + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + vring_used_event(s_vring) = idx_m; + + return ret; +} diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index d5bab4803a..3b3265c4b6 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "rte_vhost.h" #include "rte_vdpa.h" @@ -754,4 +755,43 @@ kick: eventfd_write(vq->callfd, (eventfd_t)1); } +static __rte_always_inline void * +alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t desc_addr, uint64_t desc_len) +{ + void *idesc; + uint64_t src, dst; + uint64_t len, remain = desc_len; + + idesc = rte_malloc(__func__, desc_len, 0); + if (unlikely(!idesc)) + return 0; + + dst = (uint64_t)(uintptr_t)idesc; + + while (remain) { + len = remain; + src = vhost_iova_to_vva(dev, vq, desc_addr, &len, + VHOST_ACCESS_RO); + if (unlikely(!src || !len)) { + rte_free(idesc); + return 0; + } + + rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); + + remain -= len; + dst += len; + desc_addr += len; + } + + return idesc; +} + +static __rte_always_inline void +free_ind_table(void *idesc) +{ + rte_free(idesc); +} + #endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 5e1a1a727c..8c657a1013 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -37,45 +37,6 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring) return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring; } -static __rte_always_inline void * -alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq, - uint64_t desc_addr, uint64_t desc_len) -{ - void *idesc; - uint64_t src, dst; - uint64_t len, remain = desc_len; - - idesc = rte_malloc(__func__, desc_len, 0); - if (unlikely(!idesc)) - return 0; - - dst = (uint64_t)(uintptr_t)idesc; - - while (remain) { - len = remain; - src = vhost_iova_to_vva(dev, vq, desc_addr, &len, - VHOST_ACCESS_RO); - if (unlikely(!src || !len)) { - rte_free(idesc); - return 0; - } - - rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); - - remain -= len; - dst += len; - desc_addr += len; - } - - return idesc; -} - -static __rte_always_inline void -free_ind_table(void *idesc) -{ - rte_free(idesc); -} - static __rte_always_inline void do_flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq,