vfio: fix expanding DMA area in ppc64le

In ppc64le, expanding DMA areas always fail because we cannot remove
a DMA window. As a result, we cannot allocate more than one memseg in
ppc64le. This is because vfio_spapr_dma_mem_map() doesn't unmap all
the mapped DMA before removing the window. This patch fixes this
incorrect behavior.

I also fixed the order of ioctl for unregister and unmap. The ioctl
for unregister sometimes report device busy errors due to the
existence of mapped area.

Signed-off-by: Takeshi Yoshimura <tyos@jp.ibm.com>
Acked-by: David Christensen <drc@linux.vnet.ibm.com>
This commit is contained in:
Takeshi Yoshimura 2019-07-12 18:15:32 -07:00 committed by Thomas Monjalon
parent 9d10f53e40
commit e072d16f89

View File

@ -1354,14 +1354,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
}
} else {
ret = ioctl(vfio_container_fd,
VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
if (ret) {
RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
errno, strerror(errno));
return -1;
}
memset(&dma_unmap, 0, sizeof(dma_unmap));
dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
dma_unmap.size = len;
@ -1374,28 +1366,56 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
errno, strerror(errno));
return -1;
}
ret = ioctl(vfio_container_fd,
VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
if (ret) {
RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n",
errno, strerror(errno));
return -1;
}
}
return 0;
}
struct spapr_remap_walk_param {
int vfio_container_fd;
uint64_t addr_64;
};
static int
vfio_spapr_map_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
int *vfio_container_fd = arg;
struct spapr_remap_walk_param *param = arg;
if (msl->external)
if (msl->external || ms->addr_64 == param->addr_64)
return 0;
return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
static int
vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
struct spapr_remap_walk_param *param = arg;
if (msl->external || ms->addr_64 == param->addr_64)
return 0;
return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 0);
}
struct spapr_walk_param {
uint64_t window_size;
uint64_t hugepage_sz;
uint64_t addr_64;
};
static int
vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
@ -1406,6 +1426,10 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
if (msl->external)
return 0;
/* do not iterate ms we haven't mapped yet */
if (param->addr_64 && ms->addr_64 == param->addr_64)
return 0;
if (max > param->window_size) {
param->hugepage_sz = ms->hugepage_sz;
param->window_size = max;
@ -1503,6 +1527,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
/* check if window size needs to be adjusted */
memset(&param, 0, sizeof(param));
param.addr_64 = vaddr;
/* we're inside a callback so use thread-unsafe version */
if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
@ -1516,7 +1541,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
for (i = 0; i < user_mem_maps->n_maps; i++) {
uint64_t max = user_mem_maps->maps[i].iova +
user_mem_maps->maps[i].len;
create.window_size = RTE_MAX(create.window_size, max);
param.window_size = RTE_MAX(param.window_size, max);
}
/* sPAPR requires window size to be a power of 2 */
@ -1525,9 +1550,33 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
create.levels = 1;
if (do_map) {
void *addr;
/* re-create window and remap the entire memory */
if (iova > create.window_size) {
if (iova + len > create.window_size) {
struct spapr_remap_walk_param remap_param = {
.vfio_container_fd = vfio_container_fd,
.addr_64 = vaddr,
};
/* release all maps before recreating the window */
if (rte_memseg_walk_thread_unsafe(vfio_spapr_unmap_walk,
&remap_param) < 0) {
RTE_LOG(ERR, EAL, "Could not release DMA maps\n");
ret = -1;
goto out;
}
/* release all user maps */
for (i = 0; i < user_mem_maps->n_maps; i++) {
struct user_mem_map *map =
&user_mem_maps->maps[i];
if (vfio_spapr_dma_do_map(vfio_container_fd,
map->addr, map->iova, map->len,
0)) {
RTE_LOG(ERR, EAL, "Could not release user DMA maps\n");
ret = -1;
goto out;
}
}
create.window_size = rte_align64pow2(iova + len);
if (vfio_spapr_create_new_dma_window(vfio_container_fd,
&create) < 0) {
RTE_LOG(ERR, EAL, "Could not create new DMA window\n");
@ -1537,7 +1586,7 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
/* we're inside a callback, so use thread-unsafe version
*/
if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
&vfio_container_fd) < 0) {
&remap_param) < 0) {
RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
ret = -1;
goto out;
@ -1555,23 +1604,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
}
}
}
/* now that we've remapped all of the memory that was present
* before, map the segment that we were requested to map.
*
* however, if we were called by the callback, the memory we
* were called with was already in the memseg list, so previous
* mapping should've mapped that segment already.
*
* virt2memseg_list is a relatively cheap check, so use that. if
* memory is within any memseg list, it's a memseg, so it's
* already mapped.
*/
addr = (void *)(uintptr_t)vaddr;
if (rte_mem_virt2memseg_list(addr) == NULL &&
vfio_spapr_dma_do_map(vfio_container_fd,
vaddr, iova, len, 1) < 0) {
RTE_LOG(ERR, EAL, "Could not map segment\n");
if (vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 1)) {
RTE_LOG(ERR, EAL, "Failed to map DMA\n");
ret = -1;
goto out;
}
@ -1599,6 +1633,7 @@ vfio_spapr_dma_map(int vfio_container_fd)
struct spapr_walk_param param;
memset(&param, 0, sizeof(param));
param.addr_64 = 0UL;
/* create DMA window from 0 to max(phys_addr + len) */
rte_memseg_walk(vfio_spapr_window_size_walk, &param);