freebsd-dev/sys/dev/iommu/iommu_gas.c
Doug Moore 87d405eab9 iommu_gas: initialize start_gap as first node
In iommu_gas.c, domain->start_gap points to one of the nodes on either
side of the first free, unallocated range. In iommu_gas_init_domain,
it is initialized to point to the node after the single free
range. Change it to point to the node before that free range, so that,
when 'lowaddr' is within the initial free range, the first allocation
search for free space below 'lowaddr' does not begin and end at an
address above 'lowaddr'. This fixes problems on a machine with Intel
DMAR enabled.

Reported by:	jah
Reviewed by:	dougm
Tested by:	jah
Obtained from:	jah
Fixes:	commit db151ca0c3 iommu_gas: start space search from 1st free space
MFC after:	1 day
2023-02-08 11:04:13 -06:00

1035 lines
30 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2013 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#define RB_AUGMENT_CHECK(entry) iommu_gas_augment_entry(entry)
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/rman.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/uio.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/uma.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/iommu/iommu.h>
#include <dev/iommu/iommu_gas.h>
#include <dev/iommu/iommu_msi.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/md_var.h>
#include <machine/iommu.h>
#include <dev/iommu/busdma_iommu.h>
/*
* Guest Address Space management.
*/
static uma_zone_t iommu_map_entry_zone;
#ifdef INVARIANTS
static int iommu_check_free;
#endif
static void
intel_gas_init(void)
{
iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
sizeof(struct iommu_map_entry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP);
}
SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
struct iommu_map_entry *
iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags)
{
struct iommu_map_entry *res;
KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0,
("unsupported flags %x", flags));
res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) !=
0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
if (res != NULL && domain != NULL) {
res->domain = domain;
atomic_add_int(&domain->entries_cnt, 1);
}
return (res);
}
void
iommu_gas_free_entry(struct iommu_map_entry *entry)
{
struct iommu_domain *domain;
domain = entry->domain;
if (domain != NULL)
atomic_subtract_int(&domain->entries_cnt, 1);
uma_zfree(iommu_map_entry_zone, entry);
}
static int
iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b)
{
/* Last entry have zero size, so <= */
KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
a, (uintmax_t)a->start, (uintmax_t)a->end));
KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
b, (uintmax_t)b->start, (uintmax_t)b->end));
KASSERT(a->end <= b->start || b->end <= a->start ||
a->end == a->start || b->end == b->start,
("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
a, (uintmax_t)a->start, (uintmax_t)a->end,
b, (uintmax_t)b->start, (uintmax_t)b->end));
if (a->end < b->end)
return (-1);
else if (b->end < a->end)
return (1);
return (0);
}
/*
* Update augmentation data based on data from children.
* Return true if and only if the update changes the augmentation data.
*/
static bool
iommu_gas_augment_entry(struct iommu_map_entry *entry)
{
struct iommu_map_entry *child;
iommu_gaddr_t bound, delta, free_down;
free_down = 0;
bound = entry->start;
if ((child = RB_LEFT(entry, rb_entry)) != NULL) {
free_down = MAX(child->free_down, bound - child->last);
bound = child->first;
}
delta = bound - entry->first;
entry->first = bound;
bound = entry->end;
if ((child = RB_RIGHT(entry, rb_entry)) != NULL) {
free_down = MAX(free_down, child->free_down);
free_down = MAX(free_down, child->first - bound);
bound = child->last;
}
delta += entry->last - bound;
if (delta == 0)
delta = entry->free_down - free_down;
entry->last = bound;
entry->free_down = free_down;
/*
* Return true either if the value of last-first changed,
* or if free_down changed.
*/
return (delta != 0);
}
RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
iommu_gas_cmp_entries);
#ifdef INVARIANTS
static void
iommu_gas_check_free(struct iommu_domain *domain)
{
struct iommu_map_entry *entry, *l, *r;
iommu_gaddr_t v;
RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
KASSERT(domain == entry->domain,
("mismatched free domain %p entry %p entry->domain %p",
domain, entry, entry->domain));
l = RB_LEFT(entry, rb_entry);
r = RB_RIGHT(entry, rb_entry);
v = 0;
if (l != NULL) {
v = MAX(v, l->free_down);
v = MAX(v, entry->start - l->last);
}
if (r != NULL) {
v = MAX(v, r->free_down);
v = MAX(v, r->first - entry->end);
}
MPASS(entry->free_down == v);
}
}
#endif
static void
iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry)
{
struct iommu_map_entry *nbr;
/* Removing entry may open a new free gap before domain->start_gap. */
if (entry->end <= domain->start_gap->end) {
if (RB_RIGHT(entry, rb_entry) != NULL)
nbr = iommu_gas_entries_tree_RB_NEXT(entry);
else if (RB_LEFT(entry, rb_entry) != NULL)
nbr = RB_LEFT(entry, rb_entry);
else
nbr = RB_PARENT(entry, rb_entry);
domain->start_gap = nbr;
}
RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry);
}
struct iommu_domain *
iommu_get_ctx_domain(struct iommu_ctx *ctx)
{
return (ctx->domain);
}
void
iommu_gas_init_domain(struct iommu_domain *domain)
{
struct iommu_map_entry *begin, *end;
begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
IOMMU_DOMAIN_LOCK(domain);
KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain));
KASSERT(RB_EMPTY(&domain->rb_root),
("non-empty entries %p", domain));
/*
* The end entry must be inserted first because it has a zero-length gap
* between start and end. Initially, all augmentation data for a new
* entry is zero. Function iommu_gas_augment_entry will compute no
* change in the value of (start-end) and no change in the value of
* free_down, so it will return false to suggest that nothing changed in
* the entry. Thus, inserting the end entry second prevents
* augmentation information to be propogated to the begin entry at the
* tree root. So it is inserted first.
*/
end->start = domain->end;
end->end = domain->end;
end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, end);
begin->start = 0;
begin->end = IOMMU_PAGE_SIZE;
begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED;
RB_INSERT_PREV(iommu_gas_entries_tree, &domain->rb_root, end, begin);
domain->start_gap = begin;
domain->first_place = begin;
domain->last_place = end;
domain->flags |= IOMMU_DOMAIN_GAS_INITED;
IOMMU_DOMAIN_UNLOCK(domain);
}
void
iommu_gas_fini_domain(struct iommu_domain *domain)
{
struct iommu_map_entry *entry;
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
KASSERT(domain->entries_cnt == 2,
("domain still in use %p", domain));
entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root);
KASSERT(entry->start == 0, ("start entry start %p", domain));
KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain));
KASSERT(entry->flags ==
(IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
("start entry flags %p", domain));
iommu_gas_rb_remove(domain, entry);
iommu_gas_free_entry(entry);
entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root);
KASSERT(entry->start == domain->end, ("end entry start %p", domain));
KASSERT(entry->end == domain->end, ("end entry end %p", domain));
KASSERT(entry->flags ==
(IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED),
("end entry flags %p", domain));
iommu_gas_rb_remove(domain, entry);
iommu_gas_free_entry(entry);
}
struct iommu_gas_match_args {
iommu_gaddr_t size;
int offset;
const struct bus_dma_tag_common *common;
u_int gas_flags;
struct iommu_map_entry *entry;
};
/*
* The interval [beg, end) is a free interval between two iommu_map_entries.
* Addresses can be allocated only in the range [lbound, ubound]. Try to
* allocate space in the free interval, subject to the conditions expressed by
* a, and return 'true' if and only if the allocation attempt succeeds.
*/
static bool
iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg,
iommu_gaddr_t end, iommu_gaddr_t lbound, iommu_gaddr_t ubound)
{
struct iommu_map_entry *entry;
iommu_gaddr_t first, size, start;
int offset;
/*
* The prev->end is always aligned on the page size, which
* causes page alignment for the entry->start too.
*
* Create IOMMU_PAGE_SIZE gaps before, after new entry
* to ensure that out-of-bounds accesses fault.
*/
beg = MAX(beg + IOMMU_PAGE_SIZE, lbound);
start = roundup2(beg, a->common->alignment);
if (start < beg)
return (false);
end = MIN(end - IOMMU_PAGE_SIZE - 1, ubound);
offset = a->offset;
size = a->size;
if (start + offset + size - 1 > end)
return (false);
/* Check for and try to skip past boundary crossing. */
if (!vm_addr_bound_ok(start + offset, size, a->common->boundary)) {
/*
* The start + offset to start + offset + size region crosses
* the boundary. Check if there is enough space after the next
* boundary after the beg.
*/
first = start;
beg = roundup2(start + offset + 1, a->common->boundary);
start = roundup2(beg, a->common->alignment);
if (start + offset + size - 1 > end ||
!vm_addr_bound_ok(start + offset, size,
a->common->boundary)) {
/*
* Not enough space to align at the requested boundary,
* or boundary is smaller than the size, but allowed to
* split. We already checked that start + size does not
* overlap ubound.
*
* XXXKIB. It is possible that beg is exactly at the
* start of the next entry, then we do not have gap.
* Ignore for now.
*/
if ((a->gas_flags & IOMMU_MF_CANSPLIT) == 0)
return (false);
size = beg - first - offset;
start = first;
}
}
entry = a->entry;
entry->start = start;
entry->end = start + roundup2(size + offset, IOMMU_PAGE_SIZE);
entry->flags = IOMMU_MAP_ENTRY_MAP;
return (true);
}
/* Find the next entry that might abut a big-enough range. */
static struct iommu_map_entry *
iommu_gas_next(struct iommu_map_entry *curr, iommu_gaddr_t min_free)
{
struct iommu_map_entry *next;
if ((next = RB_RIGHT(curr, rb_entry)) != NULL &&
next->free_down >= min_free) {
/* Find next entry in right subtree. */
do
curr = next;
while ((next = RB_LEFT(curr, rb_entry)) != NULL &&
next->free_down >= min_free);
} else {
/* Find next entry in a left-parent ancestor. */
while ((next = RB_PARENT(curr, rb_entry)) != NULL &&
curr == RB_RIGHT(next, rb_entry))
curr = next;
curr = next;
}
return (curr);
}
/*
* Address-ordered first-fit search of 'domain' for free space satisfying the
* conditions of 'a'. The space allocated is at least one page big, and is
* bounded by guard pages to the left and right. The allocated space for
* 'domain' is described by an rb-tree of map entries at domain->rb_root, and
* domain->start_gap points to a map entry less than or adjacent to the first
* free-space of size at least 3 pages.
*/
static int
iommu_gas_find_space(struct iommu_domain *domain,
struct iommu_gas_match_args *a)
{
struct iommu_map_entry *curr, *first;
iommu_gaddr_t addr, min_free;
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
KASSERT(a->entry->flags == 0,
("dirty entry %p %p", domain, a->entry));
/*
* start_gap may point to an entry adjacent to gaps too small for any
* new allocation. In that case, advance start_gap to the first free
* space big enough for a minimum allocation plus two guard pages.
*/
min_free = 3 * IOMMU_PAGE_SIZE;
first = domain->start_gap;
while (first != NULL && first->free_down < min_free)
first = RB_PARENT(first, rb_entry);
for (curr = first; curr != NULL;
curr = iommu_gas_next(curr, min_free)) {
if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
first->last + min_free <= curr->start)
break;
if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
curr->end + min_free <= first->first)
break;
}
domain->start_gap = curr;
/*
* If the subtree doesn't have free space for the requested allocation
* plus two guard pages, skip it.
*/
min_free = 2 * IOMMU_PAGE_SIZE +
roundup2(a->size + a->offset, IOMMU_PAGE_SIZE);
/* Climb to find a node in the subtree of big-enough ranges. */
first = curr;
while (first != NULL && first->free_down < min_free)
first = RB_PARENT(first, rb_entry);
/*
* Walk the big-enough ranges tree until one satisfies alignment
* requirements, or violates lowaddr address requirement.
*/
addr = a->common->lowaddr;
for (curr = first; curr != NULL;
curr = iommu_gas_next(curr, min_free)) {
if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
iommu_gas_match_one(a, first->last, curr->start,
0, addr)) {
RB_INSERT_PREV(iommu_gas_entries_tree,
&domain->rb_root, curr, a->entry);
return (0);
}
if (curr->end >= addr) {
/* All remaining ranges > addr */
break;
}
if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
iommu_gas_match_one(a, curr->end, first->first,
0, addr)) {
RB_INSERT_NEXT(iommu_gas_entries_tree,
&domain->rb_root, curr, a->entry);
return (0);
}
}
/*
* To resume the search at the start of the upper region, first climb to
* the nearest ancestor that spans highaddr. Then find the last entry
* before highaddr that could abut a big-enough range.
*/
addr = a->common->highaddr;
while (curr != NULL && curr->last < addr)
curr = RB_PARENT(curr, rb_entry);
first = NULL;
while (curr != NULL && curr->free_down >= min_free) {
if (addr < curr->end)
curr = RB_LEFT(curr, rb_entry);
else {
first = curr;
curr = RB_RIGHT(curr, rb_entry);
}
}
/*
* Walk the remaining big-enough ranges until one satisfies alignment
* requirements.
*/
for (curr = first; curr != NULL;
curr = iommu_gas_next(curr, min_free)) {
if ((first = RB_LEFT(curr, rb_entry)) != NULL &&
iommu_gas_match_one(a, first->last, curr->start,
addr + 1, domain->end - 1)) {
RB_INSERT_PREV(iommu_gas_entries_tree,
&domain->rb_root, curr, a->entry);
return (0);
}
if ((first = RB_RIGHT(curr, rb_entry)) != NULL &&
iommu_gas_match_one(a, curr->end, first->first,
addr + 1, domain->end - 1)) {
RB_INSERT_NEXT(iommu_gas_entries_tree,
&domain->rb_root, curr, a->entry);
return (0);
}
}
return (ENOMEM);
}
static int
iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
u_int flags)
{
struct iommu_map_entry *next, *prev;
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
if ((entry->start & IOMMU_PAGE_MASK) != 0 ||
(entry->end & IOMMU_PAGE_MASK) != 0)
return (EINVAL);
if (entry->start >= entry->end)
return (EINVAL);
if (entry->end >= domain->end)
return (EINVAL);
next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry);
KASSERT(next != NULL, ("next must be non-null %p %jx", domain,
(uintmax_t)entry->start));
prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
/* prev could be NULL */
/*
* Adapt to broken BIOSes which specify overlapping RMRR
* entries.
*
* XXXKIB: this does not handle a case when prev or next
* entries are completely covered by the current one, which
* extends both ways.
*/
if (prev != NULL && prev->end > entry->start &&
(prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
if ((flags & IOMMU_MF_RMRR) == 0 ||
(prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
return (EBUSY);
entry->start = prev->end;
}
if (next->start < entry->end &&
(next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) {
if ((flags & IOMMU_MF_RMRR) == 0 ||
(next->flags & IOMMU_MAP_ENTRY_RMRR) == 0)
return (EBUSY);
entry->end = next->start;
}
if (entry->end == entry->start)
return (0);
if (prev != NULL && prev->end > entry->start) {
/* This assumes that prev is the placeholder entry. */
iommu_gas_rb_remove(domain, prev);
prev = NULL;
}
RB_INSERT_PREV(iommu_gas_entries_tree,
&domain->rb_root, next, entry);
if (next->start < entry->end) {
iommu_gas_rb_remove(domain, next);
next = NULL;
}
if ((flags & IOMMU_MF_RMRR) != 0)
entry->flags = IOMMU_MAP_ENTRY_RMRR;
#ifdef INVARIANTS
struct iommu_map_entry *ip, *in;
ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry);
in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry);
KASSERT(prev == NULL || ip == prev,
("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
entry, entry->start, entry->end, prev,
prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
KASSERT(next == NULL || in == next,
("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
entry, entry->start, entry->end, next,
next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
#endif
return (0);
}
void
iommu_gas_free_space(struct iommu_map_entry *entry)
{
struct iommu_domain *domain;
domain = entry->domain;
KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP,
("permanent entry %p %p", domain, entry));
IOMMU_DOMAIN_LOCK(domain);
iommu_gas_rb_remove(domain, entry);
entry->flags &= ~IOMMU_MAP_ENTRY_MAP;
#ifdef INVARIANTS
if (iommu_check_free)
iommu_gas_check_free(domain);
#endif
IOMMU_DOMAIN_UNLOCK(domain);
}
void
iommu_gas_free_region(struct iommu_map_entry *entry)
{
struct iommu_domain *domain;
domain = entry->domain;
KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR |
IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR,
("non-RMRR entry %p %p", domain, entry));
IOMMU_DOMAIN_LOCK(domain);
if (entry != domain->first_place &&
entry != domain->last_place)
iommu_gas_rb_remove(domain, entry);
entry->flags &= ~IOMMU_MAP_ENTRY_RMRR;
IOMMU_DOMAIN_UNLOCK(domain);
}
static struct iommu_map_entry *
iommu_gas_remove_clip_left(struct iommu_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t end, struct iommu_map_entry **r)
{
struct iommu_map_entry *entry, *res, fentry;
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
MPASS(start <= end);
MPASS(end <= domain->end);
/*
* Find an entry which contains the supplied guest's address
* start, or the first entry after the start. Since we
* asserted that start is below domain end, entry should
* exist. Then clip it if needed.
*/
fentry.start = start + 1;
fentry.end = start + 1;
entry = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &fentry);
if (entry->start >= start ||
(entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
return (entry);
res = *r;
*r = NULL;
*res = *entry;
res->start = entry->end = start;
RB_UPDATE_AUGMENT(entry, rb_entry);
RB_INSERT_NEXT(iommu_gas_entries_tree,
&domain->rb_root, entry, res);
return (res);
}
static bool
iommu_gas_remove_clip_right(struct iommu_domain *domain,
iommu_gaddr_t end, struct iommu_map_entry *entry,
struct iommu_map_entry *r)
{
if (entry->start >= end || (entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
return (false);
*r = *entry;
r->end = entry->start = end;
RB_UPDATE_AUGMENT(entry, rb_entry);
RB_INSERT_PREV(iommu_gas_entries_tree,
&domain->rb_root, entry, r);
return (true);
}
static void
iommu_gas_remove_unmap(struct iommu_domain *domain,
struct iommu_map_entry *entry, struct iommu_map_entries_tailq *gcp)
{
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
if ((entry->flags & (IOMMU_MAP_ENTRY_UNMAPPED |
IOMMU_MAP_ENTRY_REMOVING)) != 0)
return;
MPASS((entry->flags & IOMMU_MAP_ENTRY_PLACE) == 0);
entry->flags |= IOMMU_MAP_ENTRY_REMOVING;
TAILQ_INSERT_TAIL(gcp, entry, dmamap_link);
}
/*
* Remove specified range from the GAS of the domain. Note that the
* removal is not guaranteed to occur upon the function return, it
* might be finalized some time after, when hardware reports that
* (queued) IOTLB invalidation was performed.
*/
void
iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t size)
{
struct iommu_map_entry *entry, *nentry, *r1, *r2;
struct iommu_map_entries_tailq gc;
iommu_gaddr_t end;
end = start + size;
r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
TAILQ_INIT(&gc);
IOMMU_DOMAIN_LOCK(domain);
nentry = iommu_gas_remove_clip_left(domain, start, end, &r1);
RB_FOREACH_FROM(entry, iommu_gas_entries_tree, nentry) {
if (entry->start >= end)
break;
KASSERT(start <= entry->start,
("iommu_gas_remove entry (%#jx, %#jx) start %#jx",
entry->start, entry->end, start));
if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
continue;
iommu_gas_remove_unmap(domain, entry, &gc);
}
if (iommu_gas_remove_clip_right(domain, end, entry, r2)) {
iommu_gas_remove_unmap(domain, r2, &gc);
r2 = NULL;
}
#ifdef INVARIANTS
RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) {
if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
continue;
KASSERT(entry->end <= start || entry->start >= end,
("iommu_gas_remove leftover entry (%#jx, %#jx) range "
"(%#jx, %#jx)",
entry->start, entry->end, start, end));
}
#endif
IOMMU_DOMAIN_UNLOCK(domain);
if (r1 != NULL)
iommu_gas_free_entry(r1);
if (r2 != NULL)
iommu_gas_free_entry(r2);
iommu_domain_unload(domain, &gc, true);
}
int
iommu_gas_map(struct iommu_domain *domain,
const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res)
{
struct iommu_gas_match_args a;
struct iommu_map_entry *entry;
int error;
KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0,
("invalid flags 0x%x", flags));
a.size = size;
a.offset = offset;
a.common = common;
a.gas_flags = flags;
entry = iommu_gas_alloc_entry(domain,
(flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0);
if (entry == NULL)
return (ENOMEM);
a.entry = entry;
IOMMU_DOMAIN_LOCK(domain);
error = iommu_gas_find_space(domain, &a);
if (error == ENOMEM) {
IOMMU_DOMAIN_UNLOCK(domain);
iommu_gas_free_entry(entry);
return (error);
}
#ifdef INVARIANTS
if (iommu_check_free)
iommu_gas_check_free(domain);
#endif
KASSERT(error == 0,
("unexpected error %d from iommu_gas_find_entry", error));
KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx",
(uintmax_t)entry->end, (uintmax_t)domain->end));
entry->flags |= eflags;
IOMMU_DOMAIN_UNLOCK(domain);
error = domain->ops->map(domain, entry->start,
entry->end - entry->start, ma, eflags,
((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
iommu_domain_unload_entry(entry, true,
(flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
("unexpected error %d from domain_map_buf", error));
*res = entry;
return (0);
}
int
iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry,
u_int eflags, u_int flags, vm_page_t *ma)
{
iommu_gaddr_t start;
int error;
KASSERT(entry->domain == domain,
("mismatched domain %p entry %p entry->domain %p", domain,
entry, entry->domain));
KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain,
entry, entry->flags));
KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0,
("invalid flags 0x%x", flags));
start = entry->start;
IOMMU_DOMAIN_LOCK(domain);
error = iommu_gas_alloc_region(domain, entry, flags);
if (error != 0) {
IOMMU_DOMAIN_UNLOCK(domain);
return (error);
}
entry->flags |= eflags;
IOMMU_DOMAIN_UNLOCK(domain);
if (entry->end == entry->start)
return (0);
error = domain->ops->map(domain, entry->start,
entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start),
eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0));
if (error == ENOMEM) {
iommu_domain_unload_entry(entry, false,
(flags & IOMMU_MF_CANWAIT) != 0);
return (error);
}
KASSERT(error == 0,
("unexpected error %d from domain_map_buf", error));
return (0);
}
static int
iommu_gas_reserve_region_locked(struct iommu_domain *domain,
iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry)
{
int error;
IOMMU_DOMAIN_ASSERT_LOCKED(domain);
entry->start = start;
entry->end = end;
error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT);
if (error == 0)
entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED;
return (error);
}
int
iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t end, struct iommu_map_entry **entry0)
{
struct iommu_map_entry *entry;
int error;
entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK);
IOMMU_DOMAIN_LOCK(domain);
error = iommu_gas_reserve_region_locked(domain, start, end, entry);
IOMMU_DOMAIN_UNLOCK(domain);
if (error != 0)
iommu_gas_free_entry(entry);
else if (entry0 != NULL)
*entry0 = entry;
return (error);
}
/*
* As in iommu_gas_reserve_region, reserve [start, end), but allow for existing
* entries.
*/
int
iommu_gas_reserve_region_extend(struct iommu_domain *domain,
iommu_gaddr_t start, iommu_gaddr_t end)
{
struct iommu_map_entry *entry, *next, *prev, key = {};
iommu_gaddr_t entry_start, entry_end;
int error;
error = 0;
entry = NULL;
end = ummin(end, domain->end);
while (start < end) {
/* Preallocate an entry. */
if (entry == NULL)
entry = iommu_gas_alloc_entry(domain,
IOMMU_PGF_WAITOK);
/* Calculate the free region from here to the next entry. */
key.start = key.end = start;
IOMMU_DOMAIN_LOCK(domain);
next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key);
KASSERT(next != NULL, ("domain %p with end %#jx has no entry "
"after %#jx", domain, (uintmax_t)domain->end,
(uintmax_t)start));
entry_end = ummin(end, next->start);
prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next);
if (prev != NULL)
entry_start = ummax(start, prev->end);
else
entry_start = start;
start = next->end;
/* Reserve the region if non-empty. */
if (entry_start != entry_end) {
error = iommu_gas_reserve_region_locked(domain,
entry_start, entry_end, entry);
if (error != 0) {
IOMMU_DOMAIN_UNLOCK(domain);
break;
}
entry = NULL;
}
IOMMU_DOMAIN_UNLOCK(domain);
}
/* Release a preallocated entry if it was not used. */
if (entry != NULL)
iommu_gas_free_entry(entry);
return (error);
}
void
iommu_unmap_msi(struct iommu_ctx *ctx)
{
struct iommu_map_entry *entry;
struct iommu_domain *domain;
domain = ctx->domain;
entry = domain->msi_entry;
if (entry == NULL)
return;
domain->ops->unmap(domain, entry->start, entry->end -
entry->start, IOMMU_PGF_WAITOK);
iommu_gas_free_space(entry);
iommu_gas_free_entry(entry);
domain->msi_entry = NULL;
domain->msi_base = 0;
domain->msi_phys = 0;
}
int
iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset,
u_int eflags, u_int flags, vm_page_t *ma)
{
struct iommu_domain *domain;
struct iommu_map_entry *entry;
int error;
error = 0;
domain = ctx->domain;
/* Check if there is already an MSI page allocated */
IOMMU_DOMAIN_LOCK(domain);
entry = domain->msi_entry;
IOMMU_DOMAIN_UNLOCK(domain);
if (entry == NULL) {
error = iommu_gas_map(domain, &ctx->tag->common, size, offset,
eflags, flags, ma, &entry);
IOMMU_DOMAIN_LOCK(domain);
if (error == 0) {
if (domain->msi_entry == NULL) {
MPASS(domain->msi_base == 0);
MPASS(domain->msi_phys == 0);
domain->msi_entry = entry;
domain->msi_base = entry->start;
domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]);
} else {
/*
* We lost the race and already have an
* MSI page allocated. Free the unneeded entry.
*/
iommu_gas_free_entry(entry);
}
} else if (domain->msi_entry != NULL) {
/*
* The allocation failed, but another succeeded.
* Return success as there is a valid MSI page.
*/
error = 0;
}
IOMMU_DOMAIN_UNLOCK(domain);
}
return (error);
}
void
iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr)
{
*addr = (*addr - domain->msi_phys) + domain->msi_base;
KASSERT(*addr >= domain->msi_entry->start,
("%s: Address is below the MSI entry start address (%jx < %jx)",
__func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start));
KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end,
("%s: Address is above the MSI entry end address (%jx < %jx)",
__func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end));
}
SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "");
#ifdef INVARIANTS
SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN,
&iommu_check_free, 0,
"Check the GPA RBtree for free_down and free_after validity");
#endif