numam-dpdk/lib/vhost/iotlb.c
William Tu f1f6ebc0ea eal: remove sys/queue.h from public headers
Currently there are some public headers that include 'sys/queue.h', which
is not POSIX, but usually provided by the Linux/BSD system library.
(Not in POSIX.1, POSIX.1-2001, or POSIX.1-2008. Present on the BSDs.)
The file is missing on Windows. During the Windows build, DPDK uses a
bundled copy, so building a DPDK library works fine.  But when OVS or other
applications use DPDK as a library, because some DPDK public headers
include 'sys/queue.h', on Windows, it triggers an error due to no such
file.

One solution is to install the 'lib/eal/windows/include/sys/queue.h' into
Windows environment, such as [1]. However, this means DPDK exports the
functionalities of 'sys/queue.h' into the environment, which might cause
symbols, macros, headers clashing with other applications.

The patch fixes it by removing the "#include <sys/queue.h>" from
DPDK public headers, so programs including DPDK headers don't depend
on the system to provide 'sys/queue.h'. When these public headers use
macros such as TAILQ_xxx, we replace it by the ones with RTE_ prefix.
For Windows, we copy the definitions from <sys/queue.h> to rte_os.h
in Windows EAL. Note that these RTE_ macros are compatible with
<sys/queue.h>, both at the level of API (to use with <sys/queue.h>
macros in C files) and ABI (to avoid breaking it).

Additionally, the TAILQ_FOREACH_SAFE is not part of <sys/queue.h>,
the patch replaces it with RTE_TAILQ_FOREACH_SAFE.

[1] http://mails.dpdk.org/archives/dev/2021-August/216304.html

Suggested-by: Nick Connolly <nick.connolly@mayadata.io>
Suggested-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Dmitry Kozlyuk <dmitry.kozliuk@gmail.com>
Acked-by: Narcisa Vasile <navasile@linux.microsoft.com>
2021-10-01 13:09:43 +02:00

337 lines
7.4 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2017 Red Hat, Inc.
*/
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
#include <rte_tailq.h>
#include "iotlb.h"
#include "vhost.h"
struct vhost_iotlb_entry {
TAILQ_ENTRY(vhost_iotlb_entry) next;
uint64_t iova;
uint64_t uaddr;
uint64_t size;
uint8_t perm;
};
#define IOTLB_CACHE_SIZE 2048
static void
vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq);
static void
vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq)
{
struct vhost_iotlb_entry *node, *temp_node;
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) {
TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
rte_mempool_put(vq->iotlb_pool, node);
}
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
}
bool
vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova,
uint8_t perm)
{
struct vhost_iotlb_entry *node;
bool found = false;
rte_rwlock_read_lock(&vq->iotlb_pending_lock);
TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) {
if ((node->iova == iova) && (node->perm == perm)) {
found = true;
break;
}
}
rte_rwlock_read_unlock(&vq->iotlb_pending_lock);
return found;
}
void
vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq,
uint64_t iova, uint8_t perm)
{
struct vhost_iotlb_entry *node;
int ret;
ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
if (ret) {
VHOST_LOG_CONFIG(DEBUG, "IOTLB pool empty, clear entries\n");
if (!TAILQ_EMPTY(&vq->iotlb_pending_list))
vhost_user_iotlb_pending_remove_all(vq);
else
vhost_user_iotlb_cache_random_evict(vq);
ret = rte_mempool_get(vq->iotlb_pool, (void **)&node);
if (ret) {
VHOST_LOG_CONFIG(ERR, "IOTLB pool still empty, failure\n");
return;
}
}
node->iova = iova;
node->perm = perm;
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next);
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
}
void
vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq,
uint64_t iova, uint64_t size, uint8_t perm)
{
struct vhost_iotlb_entry *node, *temp_node;
rte_rwlock_write_lock(&vq->iotlb_pending_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next,
temp_node) {
if (node->iova < iova)
continue;
if (node->iova >= iova + size)
continue;
if ((node->perm & perm) != node->perm)
continue;
TAILQ_REMOVE(&vq->iotlb_pending_list, node, next);
rte_mempool_put(vq->iotlb_pool, node);
}
rte_rwlock_write_unlock(&vq->iotlb_pending_lock);
}
static void
vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
{
struct vhost_iotlb_entry *node, *temp_node;
rte_rwlock_write_lock(&vq->iotlb_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
TAILQ_REMOVE(&vq->iotlb_list, node, next);
rte_mempool_put(vq->iotlb_pool, node);
}
vq->iotlb_cache_nr = 0;
rte_rwlock_write_unlock(&vq->iotlb_lock);
}
static void
vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
{
struct vhost_iotlb_entry *node, *temp_node;
int entry_idx;
rte_rwlock_write_lock(&vq->iotlb_lock);
entry_idx = rte_rand() % vq->iotlb_cache_nr;
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
if (!entry_idx) {
TAILQ_REMOVE(&vq->iotlb_list, node, next);
rte_mempool_put(vq->iotlb_pool, node);
vq->iotlb_cache_nr--;
break;
}
entry_idx--;
}
rte_rwlock_write_unlock(&vq->iotlb_lock);
}
void
vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t uaddr, uint64_t size, uint8_t perm)
{
struct vhost_iotlb_entry *node, *new_node;
int ret;
ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
if (ret) {
VHOST_LOG_CONFIG(DEBUG, "IOTLB pool empty, clear entries\n");
if (!TAILQ_EMPTY(&vq->iotlb_list))
vhost_user_iotlb_cache_random_evict(vq);
else
vhost_user_iotlb_pending_remove_all(vq);
ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
if (ret) {
VHOST_LOG_CONFIG(ERR, "IOTLB pool still empty, failure\n");
return;
}
}
new_node->iova = iova;
new_node->uaddr = uaddr;
new_node->size = size;
new_node->perm = perm;
rte_rwlock_write_lock(&vq->iotlb_lock);
TAILQ_FOREACH(node, &vq->iotlb_list, next) {
/*
* Entries must be invalidated before being updated.
* So if iova already in list, assume identical.
*/
if (node->iova == new_node->iova) {
rte_mempool_put(vq->iotlb_pool, new_node);
goto unlock;
} else if (node->iova > new_node->iova) {
TAILQ_INSERT_BEFORE(node, new_node, next);
vq->iotlb_cache_nr++;
goto unlock;
}
}
TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
vq->iotlb_cache_nr++;
unlock:
vhost_user_iotlb_pending_remove(vq, iova, size, perm);
rte_rwlock_write_unlock(&vq->iotlb_lock);
}
void
vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
uint64_t iova, uint64_t size)
{
struct vhost_iotlb_entry *node, *temp_node;
if (unlikely(!size))
return;
rte_rwlock_write_lock(&vq->iotlb_lock);
RTE_TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
/* Sorted list */
if (unlikely(iova + size < node->iova))
break;
if (iova < node->iova + node->size) {
TAILQ_REMOVE(&vq->iotlb_list, node, next);
rte_mempool_put(vq->iotlb_pool, node);
vq->iotlb_cache_nr--;
}
}
rte_rwlock_write_unlock(&vq->iotlb_lock);
}
uint64_t
vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t *size, uint8_t perm)
{
struct vhost_iotlb_entry *node;
uint64_t offset, vva = 0, mapped = 0;
if (unlikely(!*size))
goto out;
TAILQ_FOREACH(node, &vq->iotlb_list, next) {
/* List sorted by iova */
if (unlikely(iova < node->iova))
break;
if (iova >= node->iova + node->size)
continue;
if (unlikely((perm & node->perm) != perm)) {
vva = 0;
break;
}
offset = iova - node->iova;
if (!vva)
vva = node->uaddr + offset;
mapped += node->size - offset;
iova = node->iova + node->size;
if (mapped >= *size)
break;
}
out:
/* Only part of the requested chunk is mapped */
if (unlikely(mapped < *size))
*size = mapped;
return vva;
}
void
vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
{
vhost_user_iotlb_cache_remove_all(vq);
vhost_user_iotlb_pending_remove_all(vq);
}
int
vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
{
char pool_name[RTE_MEMPOOL_NAMESIZE];
struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
int socket = 0;
if (vq->iotlb_pool) {
/*
* The cache has already been initialized,
* just drop all cached and pending entries.
*/
vhost_user_iotlb_flush_all(vq);
}
#ifdef RTE_LIBRTE_VHOST_NUMA
if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0)
socket = 0;
#endif
rte_rwlock_init(&vq->iotlb_lock);
rte_rwlock_init(&vq->iotlb_pending_lock);
TAILQ_INIT(&vq->iotlb_list);
TAILQ_INIT(&vq->iotlb_pending_list);
snprintf(pool_name, sizeof(pool_name), "iotlb_%u_%d_%d",
getpid(), dev->vid, vq_index);
VHOST_LOG_CONFIG(DEBUG, "IOTLB cache name: %s\n", pool_name);
/* If already created, free it and recreate */
vq->iotlb_pool = rte_mempool_lookup(pool_name);
if (vq->iotlb_pool)
rte_mempool_free(vq->iotlb_pool);
vq->iotlb_pool = rte_mempool_create(pool_name,
IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
0, 0, NULL, NULL, NULL, socket,
MEMPOOL_F_NO_CACHE_ALIGN |
MEMPOOL_F_SP_PUT);
if (!vq->iotlb_pool) {
VHOST_LOG_CONFIG(ERR,
"Failed to create IOTLB cache pool (%s)\n",
pool_name);
return -1;
}
vq->iotlb_cache_nr = 0;
return 0;
}