diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h index 388a8cb3b2ce..ba592ea08ff1 100644 --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -199,6 +199,8 @@ int iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma); int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry **entry0); +int iommu_gas_reserve_region_extend(struct iommu_domain *domain, + iommu_gaddr_t start, iommu_gaddr_t end); void iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno); bool iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno); diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 841bb3c83da5..431c0c195c35 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -677,6 +677,22 @@ iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, return (0); } +static int +iommu_gas_reserve_region_locked(struct iommu_domain *domain, + iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) +{ + int error; + + IOMMU_DOMAIN_ASSERT_LOCKED(domain); + + entry->start = start; + entry->end = end; + error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); + if (error == 0) + entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; + return (error); +} + int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry **entry0) @@ -685,12 +701,8 @@ iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, int error; entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); - entry->start = start; - entry->end = end; IOMMU_DOMAIN_LOCK(domain); - error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); - if (error == 0) - entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; + error = iommu_gas_reserve_region_locked(domain, start, end, entry); IOMMU_DOMAIN_UNLOCK(domain); if (error != 0) iommu_gas_free_entry(domain, entry); @@ -699,6 +711,56 @@ iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, return (error); } +/* + * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing + * entries. + */ +int +iommu_gas_reserve_region_extend(struct iommu_domain *domain, + iommu_gaddr_t start, iommu_gaddr_t end) +{ + struct iommu_map_entry *entry, *next, *prev, key = {}; + iommu_gaddr_t entry_start, entry_end; + int error; + + error = 0; + entry = NULL; + end = ummin(end, domain->end); + while (start < end) { + /* Preallocate an entry. */ + if (entry == NULL) + entry = iommu_gas_alloc_entry(domain, + IOMMU_PGF_WAITOK); + /* Calculate the free region from here to the next entry. */ + key.start = key.end = start; + IOMMU_DOMAIN_LOCK(domain); + next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); + KASSERT(next != NULL, ("domain %p with end %#jx has no entry " + "after %#jx", domain, (uintmax_t)domain->end, + (uintmax_t)start)); + entry_end = ummin(end, next->start); + prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); + if (prev != NULL) + entry_start = ummax(start, prev->end); + else + entry_start = start; + start = next->end; + /* Reserve the region if non-empty. */ + if (entry_start != entry_end) { + error = iommu_gas_reserve_region_locked(domain, + entry_start, entry_end, entry); + if (error != 0) + break; + entry = NULL; + } + IOMMU_DOMAIN_UNLOCK(domain); + } + /* Release a preallocated entry if it was not used. */ + if (entry != NULL) + iommu_gas_free_entry(domain, entry); + return (error); +} + struct iommu_map_entry * iommu_map_alloc_entry(struct iommu_domain *domain, u_int flags) { diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c index 9d67de127907..110740d6ad01 100644 --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -317,6 +317,66 @@ domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus, return (error); } +/* + * PCI memory address space is shared between memory-mapped devices (MMIO) and + * host memory (which may be remapped by an IOMMU). Device accesses to an + * address within a memory aperture in a PCIe root port will be treated as + * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the + * address space of the root port's memory apertures in the address space used + * by the IOMMU for remapping. + */ +static int +dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev) +{ + struct iommu_domain *iodom; + device_t root; + uint32_t val; + uint64_t base, limit; + int error; + + iodom = DOM2IODOM(domain); + + root = pci_find_pcie_root_port(dev); + if (root == NULL) + return (0); + + /* Disable downstream memory */ + base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2)); + limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2)); + error = iommu_gas_reserve_region_extend(iodom, base, limit + 1); + if (bootverbose || error != 0) + device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n", + base, limit + 1, error); + if (error != 0) + return (error); + + /* Disable downstream prefetchable memory */ + val = pci_read_config(root, PCIR_PMBASEL_1, 2); + if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) { + if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { + base = PCI_PPBMEMBASE( + pci_read_config(root, PCIR_PMBASEH_1, 4), + val); + limit = PCI_PPBMEMLIMIT( + pci_read_config(root, PCIR_PMLIMITH_1, 4), + pci_read_config(root, PCIR_PMLIMITL_1, 2)); + } else { + base = PCI_PPBMEMBASE(0, val); + limit = PCI_PPBMEMLIMIT(0, + pci_read_config(root, PCIR_PMLIMITL_1, 2)); + } + error = iommu_gas_reserve_region_extend(iodom, base, + limit + 1); + if (bootverbose || error != 0) + device_printf(dev, "DMAR reserve [%#jx-%#jx] " + "(error %d)\n", base, limit + 1, error); + if (error != 0) + return (error); + } + + return (error); +} + static struct dmar_domain * dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) { @@ -502,6 +562,8 @@ dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, error = domain_init_rmrr(domain1, dev, bus, slot, func, dev_domain, dev_busno, dev_path, dev_path_len); + if (error == 0) + error = dmar_reserve_pci_regions(domain1, dev); if (error != 0) { dmar_domain_destroy(domain1); TD_PINNED_ASSERT;