f8f6b1c174
A mempool consumes 3 memzones (with the default ring mempool driver). The default DPDK configuration allows RTE_MAX_MEMZONE (2560) memzones. Assuming there is no other memzones that means that we can have a maximum of 853 mempools. In the vhost library, the IOTLB cache code so far was requesting a mempool per vq, which means that at the maximum, the vhost library could request mempools for 426 qps. This limit was recently reached on big systems with a lot of virtio ports (and multiqueue in use). While the limit on mempool count could be something we fix at the DPDK project level, there is no reason to use mempools for the IOTLB cache: - the IOTLB cache entries do not need to be DMA-able and are only used by the current process (in multiprocess context), - getting/putting objects from/in the mempool is always associated with some other locks, so some level of lock contention is already present, We can convert to a malloc'd pool with objects put in a free list protected by a spinlock. Signed-off-by: David Marchand <david.marchand@redhat.com> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
365 lines
8.2 KiB
C
365 lines
8.2 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (c) 2017 Red Hat, Inc.
|
|
*/
|
|
|
|
#ifdef RTE_LIBRTE_VHOST_NUMA
|
|
#include <numaif.h>
|
|
#endif
|
|
|
|
#include <rte_tailq.h>
|
|
|
|
#include "iotlb.h"
|
|
#include "vhost.h"
|
|
|
|
struct vhost_iotlb_entry {
|
|
TAILQ_ENTRY(vhost_iotlb_entry) next;
|
|
SLIST_ENTRY(vhost_iotlb_entry) next_free;
|
|
|
|
uint64_t iova;
|
|
uint64_t uaddr;
|
|
uint64_t size;
|
|
uint8_t perm;
|
|
};
|
|
|
|
#define IOTLB_CACHE_SIZE 2048
|
|
|
|
static struct vhost_iotlb_entry *
|
|
vhost_user_iotlb_pool_get(struct vhost_virtqueue *vq)
|
|
{
|
|
struct vhost_iotlb_entry *node;
|
|
|
|
rte_spinlock_lock(&vq->iotlb_free_lock);
|
|
node = SLIST_FIRST(&vq->iotlb_free_list);
|
|
if (node != NULL)
|
|
SLIST_REMOVE_HEAD(&vq->iotlb_free_list, next_free);
|
|
rte_spinlock_unlock(&vq->iotlb_free_lock);
|
|
return node;
|
|
}
|
|
|
|
static void
|
|
vhost_user_iotlb_pool_put(struct vhost_virtqueue *vq,
|
|
struct vhost_iotlb_entry *node)
|
|
{
|
|
rte_spinlock_lock(&vq->iotlb_free_lock);
|
|
SLIST_INSERT_HEAD(&vq->iotlb_free_list, node, next_free);
|
|
rte_spinlock_unlock(&vq->iotlb_free_lock);
|
|
}
|
|
|
|
static void
|
|
vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
|
|
|
|
static void
|
|
vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
|
|
{
|
|
struct vhost_iotlb_entry *node, *temp_node;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
|
|
|
|
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
|
|
TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
|
|
vhost_user_iotlb_pool_put(vq, node);
|
|
}
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
|
|
}
|
|
|
|
bool
|
|
vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
|
|
uint8_t perm)
|
|
{
|
|
struct vhost_iotlb_entry *node;
|
|
bool found = false;
|
|
|
|
rte_rwlock_read_lock(&vq->iotlb_pending_lock);
|
|
|
|
TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
|
|
if ((node->iova == iova) && (node->perm == perm)) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
|
|
|
|
return found;
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_pending_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
uint64_t iova, uint8_t perm)
|
|
{
|
|
struct vhost_iotlb_entry *node;
|
|
|
|
node = vhost_user_iotlb_pool_get(vq);
|
|
if (node == NULL) {
|
|
VHOST_LOG_CONFIG(dev->ifname, DEBUG,
|
|
"IOTLB pool for vq %"PRIu32" empty, clear entries for pending insertion\n",
|
|
vq->index);
|
|
if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
|
|
vhost_user_iotlb_pending_remove_all(vq);
|
|
else
|
|
vhost_user_iotlb_cache_random_evict(vq);
|
|
node = vhost_user_iotlb_pool_get(vq);
|
|
if (node == NULL) {
|
|
VHOST_LOG_CONFIG(dev->ifname, ERR,
|
|
"IOTLB pool vq %"PRIu32" still empty, pending insertion failure\n",
|
|
vq->index);
|
|
return;
|
|
}
|
|
}
|
|
|
|
node->iova = iova;
|
|
node->perm = perm;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
|
|
|
|
TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
|
|
uint64_t iova, uint64_t size, uint8_t perm)
|
|
{
|
|
struct vhost_iotlb_entry *node, *temp_node;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
|
|
|
|
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
|
|
temp_node) {
|
|
if (node->iova < iova)
|
|
continue;
|
|
if (node->iova >= iova + size)
|
|
continue;
|
|
if ((node->perm & perm) != node->perm)
|
|
continue;
|
|
TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
|
|
vhost_user_iotlb_pool_put(vq, node);
|
|
}
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
|
|
}
|
|
|
|
static void
|
|
vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
|
|
{
|
|
struct vhost_iotlb_entry *node, *temp_node;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_lock);
|
|
|
|
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
|
|
TAILQ_REMOVE(&vq->iotlb_list, node, next);
|
|
vhost_user_iotlb_pool_put(vq, node);
|
|
}
|
|
|
|
vq->iotlb_cache_nr = 0;
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_lock);
|
|
}
|
|
|
|
static void
|
|
vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
|
|
{
|
|
struct vhost_iotlb_entry *node, *temp_node;
|
|
int entry_idx;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_lock);
|
|
|
|
entry_idx = rte_rand() % vq->iotlb_cache_nr;
|
|
|
|
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
|
|
if (!entry_idx) {
|
|
TAILQ_REMOVE(&vq->iotlb_list, node, next);
|
|
vhost_user_iotlb_pool_put(vq, node);
|
|
vq->iotlb_cache_nr--;
|
|
break;
|
|
}
|
|
entry_idx--;
|
|
}
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_lock);
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_cache_insert(struct virtio_net *dev, struct vhost_virtqueue *vq,
|
|
uint64_t iova, uint64_t uaddr,
|
|
uint64_t size, uint8_t perm)
|
|
{
|
|
struct vhost_iotlb_entry *node, *new_node;
|
|
|
|
new_node = vhost_user_iotlb_pool_get(vq);
|
|
if (new_node == NULL) {
|
|
VHOST_LOG_CONFIG(dev->ifname, DEBUG,
|
|
"IOTLB pool vq %"PRIu32" empty, clear entries for cache insertion\n",
|
|
vq->index);
|
|
if (!TAILQ_EMPTY(&vq->iotlb_list))
|
|
vhost_user_iotlb_cache_random_evict(vq);
|
|
else
|
|
vhost_user_iotlb_pending_remove_all(vq);
|
|
new_node = vhost_user_iotlb_pool_get(vq);
|
|
if (new_node == NULL) {
|
|
VHOST_LOG_CONFIG(dev->ifname, ERR,
|
|
"IOTLB pool vq %"PRIu32" still empty, cache insertion failed\n",
|
|
vq->index);
|
|
return;
|
|
}
|
|
}
|
|
|
|
new_node->iova = iova;
|
|
new_node->uaddr = uaddr;
|
|
new_node->size = size;
|
|
new_node->perm = perm;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_lock);
|
|
|
|
TAILQ_FOREACH(node, &vq->iotlb_list, next) {
|
|
/*
|
|
* Entries must be invalidated before being updated.
|
|
* So if iova already in list, assume identical.
|
|
*/
|
|
if (node->iova == new_node->iova) {
|
|
vhost_user_iotlb_pool_put(vq, new_node);
|
|
goto unlock;
|
|
} else if (node->iova > new_node->iova) {
|
|
TAILQ_INSERT_BEFORE(node, new_node, next);
|
|
vq->iotlb_cache_nr++;
|
|
goto unlock;
|
|
}
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
|
|
vq->iotlb_cache_nr++;
|
|
|
|
unlock:
|
|
vhost_user_iotlb_pending_remove(vq, iova, size, perm);
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_lock);
|
|
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
|
|
uint64_t iova, uint64_t size)
|
|
{
|
|
struct vhost_iotlb_entry *node, *temp_node;
|
|
|
|
if (unlikely(!size))
|
|
return;
|
|
|
|
rte_rwlock_write_lock(&vq->iotlb_lock);
|
|
|
|
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
|
|
/* Sorted list */
|
|
if (unlikely(iova + size < node->iova))
|
|
break;
|
|
|
|
if (iova < node->iova + node->size) {
|
|
TAILQ_REMOVE(&vq->iotlb_list, node, next);
|
|
vhost_user_iotlb_pool_put(vq, node);
|
|
vq->iotlb_cache_nr--;
|
|
}
|
|
}
|
|
|
|
rte_rwlock_write_unlock(&vq->iotlb_lock);
|
|
}
|
|
|
|
uint64_t
|
|
vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
|
|
uint64_t *size, uint8_t perm)
|
|
{
|
|
struct vhost_iotlb_entry *node;
|
|
uint64_t offset, vva = 0, mapped = 0;
|
|
|
|
if (unlikely(!*size))
|
|
goto out;
|
|
|
|
TAILQ_FOREACH(node, &vq->iotlb_list, next) {
|
|
/* List sorted by iova */
|
|
if (unlikely(iova < node->iova))
|
|
break;
|
|
|
|
if (iova >= node->iova + node->size)
|
|
continue;
|
|
|
|
if (unlikely((perm & node->perm) != perm)) {
|
|
vva = 0;
|
|
break;
|
|
}
|
|
|
|
offset = iova - node->iova;
|
|
if (!vva)
|
|
vva = node->uaddr + offset;
|
|
|
|
mapped += node->size - offset;
|
|
iova = node->iova + node->size;
|
|
|
|
if (mapped >= *size)
|
|
break;
|
|
}
|
|
|
|
out:
|
|
/* Only part of the requested chunk is mapped */
|
|
if (unlikely(mapped < *size))
|
|
*size = mapped;
|
|
|
|
return vva;
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
|
|
{
|
|
vhost_user_iotlb_cache_remove_all(vq);
|
|
vhost_user_iotlb_pending_remove_all(vq);
|
|
}
|
|
|
|
int
|
|
vhost_user_iotlb_init(struct virtio_net *dev, struct vhost_virtqueue *vq)
|
|
{
|
|
unsigned int i;
|
|
int socket = 0;
|
|
|
|
if (vq->iotlb_pool) {
|
|
/*
|
|
* The cache has already been initialized,
|
|
* just drop all cached and pending entries.
|
|
*/
|
|
vhost_user_iotlb_flush_all(vq);
|
|
rte_free(vq->iotlb_pool);
|
|
}
|
|
|
|
#ifdef RTE_LIBRTE_VHOST_NUMA
|
|
if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
|
|
socket = 0;
|
|
#endif
|
|
|
|
rte_spinlock_init(&vq->iotlb_free_lock);
|
|
rte_rwlock_init(&vq->iotlb_lock);
|
|
rte_rwlock_init(&vq->iotlb_pending_lock);
|
|
|
|
SLIST_INIT(&vq->iotlb_free_list);
|
|
TAILQ_INIT(&vq->iotlb_list);
|
|
TAILQ_INIT(&vq->iotlb_pending_list);
|
|
|
|
vq->iotlb_pool = rte_calloc_socket("iotlb", IOTLB_CACHE_SIZE,
|
|
sizeof(struct vhost_iotlb_entry), 0, socket);
|
|
if (!vq->iotlb_pool) {
|
|
VHOST_LOG_CONFIG(dev->ifname, ERR,
|
|
"Failed to create IOTLB cache pool for vq %"PRIu32"\n",
|
|
vq->index);
|
|
return -1;
|
|
}
|
|
for (i = 0; i < IOTLB_CACHE_SIZE; i++)
|
|
vhost_user_iotlb_pool_put(vq, &vq->iotlb_pool[i]);
|
|
|
|
vq->iotlb_cache_nr = 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
vhost_user_iotlb_destroy(struct vhost_virtqueue *vq)
|
|
{
|
|
rte_free(vq->iotlb_pool);
|
|
}
|