/* SPDX-License-Identifier: BSD-3-Clause * Copyright (c) 2017 Red Hat, Inc. */ #ifdef RTE_LIBRTE_VHOST_NUMA #include #endif #include #include "iotlb.h" #include "vhost.h" struct vhost_iotlb_entry { TAILQ_ENTRY(vhost_iotlb_entry) next; SLIST_ENTRY(vhost_iotlb_entry) next_free; uint64_t iova; uint64_t uaddr; uint64_t size; uint8_t perm; }; #define IOTLB_CACHE_SIZE 2048 static struct vhost_iotlb_entry * vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq) { struct vhost_iotlb_entry *node; rte_spinlock_lock(&vq->iotlb_free_lock); node = SLIST_FIRST(&vq->iotlb_free_list); if (node != NULL) SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free); rte_spinlock_unlock(&vq->iotlb_free_lock); return node; } static void vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq, struct vhost_iotlb_entry *node) { rte_spinlock_lock(&vq->iotlb_free_lock); SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free); rte_spinlock_unlock(&vq->iotlb_free_lock); } static void vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); static void vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) { struct vhost_iotlb_entry *node, *temp_node; rte_rwlock_write_lock(&vq->iotlb_pending_lock); RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); } bool vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, uint8_t perm) { struct vhost_iotlb_entry *node; bool found = false; rte_rwlock_read_lock(&vq->iotlb_pending_lock); TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) { if ((node->iova == iova) && (node->perm == perm)) { found = true; break; } } rte_rwlock_read_unlock(&vq->iotlb_pending_lock); return found; } void vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t iova, uint8_t perm) { struct vhost_iotlb_entry *node; node = vhost_user_iotlb_pool_get(vq); if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n", vq->index); if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) vhost_user_iotlb_pending_remove_all(vq); else vhost_user_iotlb_cache_random_evict(vq); node = vhost_user_iotlb_pool_get(vq); if (node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, "IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n", vq->index); return; } } node->iova = iova; node->perm = perm; rte_rwlock_write_lock(&vq->iotlb_pending_lock); TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next); rte_rwlock_write_unlock(&vq->iotlb_pending_lock); } void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm) { struct vhost_iotlb_entry *node, *temp_node; rte_rwlock_write_lock(&vq->iotlb_pending_lock); RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { if (node->iova < iova) continue; if (node->iova >= iova + size) continue; if ((node->perm & perm) != node->perm) continue; TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); vhost_user_iotlb_pool_put(vq, node); } rte_rwlock_write_unlock(&vq->iotlb_pending_lock); } static void vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) { struct vhost_iotlb_entry *node, *temp_node; rte_rwlock_write_lock(&vq->iotlb_lock); RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { TAILQ_REMOVE(&vq->iotlb_list, node, next); vhost_user_iotlb_pool_put(vq, node); } vq->iotlb_cache_nr = 0; rte_rwlock_write_unlock(&vq->iotlb_lock); } static void vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) { struct vhost_iotlb_entry *node, *temp_node; int entry_idx; rte_rwlock_write_lock(&vq->iotlb_lock); entry_idx = rte_rand() % vq->iotlb_cache_nr; RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { if (!entry_idx) { TAILQ_REMOVE(&vq->iotlb_list, node, next); vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; break; } entry_idx--; } rte_rwlock_write_unlock(&vq->iotlb_lock); } void vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t iova, uint64_t uaddr, uint64_t size, uint8_t perm) { struct vhost_iotlb_entry *node, *new_node; new_node = vhost_user_iotlb_pool_get(vq); if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, DEBUG, "IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n", vq->index); if (!TAILQ_EMPTY(&vq->iotlb_list)) vhost_user_iotlb_cache_random_evict(vq); else vhost_user_iotlb_pending_remove_all(vq); new_node = vhost_user_iotlb_pool_get(vq); if (new_node == NULL) { VHOST_LOG_CONFIG(dev->ifname, ERR, "IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n", vq->index); return; } } new_node->iova = iova; new_node->uaddr = uaddr; new_node->size = size; new_node->perm = perm; rte_rwlock_write_lock(&vq->iotlb_lock); TAILQ_FOREACH(node, &vq->iotlb_list, next) { /* * Entries must be invalidated before being updated. * So if iova already in list, assume identical. */ if (node->iova == new_node->iova) { vhost_user_iotlb_pool_put(vq, new_node); goto unlock; } else if (node->iova > new_node->iova) { TAILQ_INSERT_BEFORE(node, new_node, next); vq->iotlb_cache_nr++; goto unlock; } } TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next); vq->iotlb_cache_nr++; unlock: vhost_user_iotlb_pending_remove(vq, iova, size, perm); rte_rwlock_write_unlock(&vq->iotlb_lock); } void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size) { struct vhost_iotlb_entry *node, *temp_node; if (unlikely(!size)) return; rte_rwlock_write_lock(&vq->iotlb_lock); RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { /* Sorted list */ if (unlikely(iova + size < node->iova)) break; if (iova < node->iova + node->size) { TAILQ_REMOVE(&vq->iotlb_list, node, next); vhost_user_iotlb_pool_put(vq, node); vq->iotlb_cache_nr--; } } rte_rwlock_write_unlock(&vq->iotlb_lock); } uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, uint64_t *size, uint8_t perm) { struct vhost_iotlb_entry *node; uint64_t offset, vva = 0, mapped = 0; if (unlikely(!*size)) goto out; TAILQ_FOREACH(node, &vq->iotlb_list, next) { /* List sorted by iova */ if (unlikely(iova < node->iova)) break; if (iova >= node->iova + node->size) continue; if (unlikely((perm & node->perm) != perm)) { vva = 0; break; } offset = iova - node->iova; if (!vva) vva = node->uaddr + offset; mapped += node->size - offset; iova = node->iova + node->size; if (mapped >= *size) break; } out: /* Only part of the requested chunk is mapped */ if (unlikely(mapped < *size)) *size = mapped; return vva; } void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) { vhost_user_iotlb_cache_remove_all(vq); vhost_user_iotlb_pending_remove_all(vq); } int vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq) { unsigned int i; int socket = 0; if (vq->iotlb_pool) { /* * The cache has already been initialized, * just drop all cached and pending entries. */ vhost_user_iotlb_flush_all(vq); rte_free(vq->iotlb_pool); } #ifdef RTE_LIBRTE_VHOST_NUMA if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0) socket = 0; #endif rte_spinlock_init(&vq->iotlb_free_lock); rte_rwlock_init(&vq->iotlb_lock); rte_rwlock_init(&vq->iotlb_pending_lock); SLIST_INIT(&vq->iotlb_free_list); TAILQ_INIT(&vq->iotlb_list); TAILQ_INIT(&vq->iotlb_pending_list); if (dev->flags & VIRTIO_DEV_SUPPORT_IOMMU) { vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, socket); if (!vq->iotlb_pool) { VHOST_LOG_CONFIG(dev->ifname, ERR, "Failed to create IOTLB cache pool for vq %"PRIu32"\n", vq->index); return -1; } for (i = 0; i < IOTLB_CACHE_SIZE; i++) vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]); } vq->iotlb_cache_nr = 0; return 0; } void vhost_user_iotlb_destroy(struct vhost_virtqueue *vq) { rte_free(vq->iotlb_pool); }