amd64: make uiomove_fromphys functional for pages not mapped by the DMAP
Place the code introduced in r268660 into a separate function that can be called from uiomove_fromphys. Instead of pre-allocating two KVA pages use vmem_alloc to allocate them on demand when needed. This prevents blocking if a page fault is taken while physical addresses from outside the DMAP are used, since the lock is now removed. Also introduce a safety catch in PHYS_TO_DMAP and DMAP_TO_PHYS. Sponsored by: Citrix Systems R&D Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D947 amd64/amd64/pmap.c: - Factor out the code to deal with non DMAP addresses from pmap_copy_pages and place it in pmap_map_io_transient. - Change the code to use vmem_alloc instead of a set of pre-allocated pages. - Use pmap_qenter and don't pin the thread if there can be page faults. amd64/amd64/uio_machdep.c: - Use pmap_map_io_transient in order to correctly deal with physical addresses not covered by the DMAP. amd64/include/pmap.h: - Add the prototypes for the new functions. amd64/include/vmparam.h: - Add safety catches to make sure PHYS_TO_DMAP and DMAP_TO_PHYS are only used with addresses covered by the DMAP.
This commit is contained in:
parent
cd033efc14
commit
927dc0e02a
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=273582
@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/sysctl.h>
|
||||
@ -402,11 +403,6 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
|
||||
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
|
||||
"Count of saved TLB context on switch");
|
||||
|
||||
/* pmap_copy_pages() over non-DMAP */
|
||||
static struct mtx cpage_lock;
|
||||
static vm_offset_t cpage_a;
|
||||
static vm_offset_t cpage_b;
|
||||
|
||||
/*
|
||||
* Crashdump maps.
|
||||
*/
|
||||
@ -1072,10 +1068,6 @@ pmap_init(void)
|
||||
M_WAITOK | M_ZERO);
|
||||
for (i = 0; i < pv_npg; i++)
|
||||
TAILQ_INIT(&pv_table[i].pv_list);
|
||||
|
||||
mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF);
|
||||
cpage_a = kva_alloc(PAGE_SIZE);
|
||||
cpage_b = kva_alloc(PAGE_SIZE);
|
||||
}
|
||||
|
||||
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
|
||||
@ -5056,66 +5048,24 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
|
||||
vm_offset_t b_offset, int xfersize)
|
||||
{
|
||||
void *a_cp, *b_cp;
|
||||
vm_page_t m_a, m_b;
|
||||
vm_paddr_t p_a, p_b;
|
||||
pt_entry_t *pte;
|
||||
vm_offset_t a_pg_offset, b_pg_offset;
|
||||
vm_page_t pages[2];
|
||||
vm_offset_t vaddr[2], a_pg_offset, b_pg_offset;
|
||||
int cnt;
|
||||
boolean_t pinned;
|
||||
boolean_t mapped;
|
||||
|
||||
/*
|
||||
* NB: The sequence of updating a page table followed by accesses
|
||||
* to the corresponding pages used in the !DMAP case is subject to
|
||||
* the situation described in the "AMD64 Architecture Programmer's
|
||||
* Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
|
||||
* Coherency Considerations". Therefore, issuing the INVLPG right
|
||||
* after modifying the PTE bits is crucial.
|
||||
*/
|
||||
pinned = FALSE;
|
||||
while (xfersize > 0) {
|
||||
a_pg_offset = a_offset & PAGE_MASK;
|
||||
m_a = ma[a_offset >> PAGE_SHIFT];
|
||||
p_a = m_a->phys_addr;
|
||||
pages[0] = ma[a_offset >> PAGE_SHIFT];
|
||||
b_pg_offset = b_offset & PAGE_MASK;
|
||||
m_b = mb[b_offset >> PAGE_SHIFT];
|
||||
p_b = m_b->phys_addr;
|
||||
pages[1] = mb[b_offset >> PAGE_SHIFT];
|
||||
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
|
||||
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
|
||||
if (__predict_false(p_a < DMAP_MIN_ADDRESS ||
|
||||
p_a > DMAP_MIN_ADDRESS + dmaplimit)) {
|
||||
mtx_lock(&cpage_lock);
|
||||
sched_pin();
|
||||
pinned = TRUE;
|
||||
pte = vtopte(cpage_a);
|
||||
*pte = p_a | X86_PG_A | X86_PG_V |
|
||||
pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0);
|
||||
invlpg(cpage_a);
|
||||
a_cp = (char *)cpage_a + a_pg_offset;
|
||||
} else {
|
||||
a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
|
||||
}
|
||||
if (__predict_false(p_b < DMAP_MIN_ADDRESS ||
|
||||
p_b > DMAP_MIN_ADDRESS + dmaplimit)) {
|
||||
if (!pinned) {
|
||||
mtx_lock(&cpage_lock);
|
||||
sched_pin();
|
||||
pinned = TRUE;
|
||||
}
|
||||
pte = vtopte(cpage_b);
|
||||
*pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW |
|
||||
X86_PG_V | pmap_cache_bits(kernel_pmap,
|
||||
m_b->md.pat_mode, 0);
|
||||
invlpg(cpage_b);
|
||||
b_cp = (char *)cpage_b + b_pg_offset;
|
||||
} else {
|
||||
b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
|
||||
}
|
||||
mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE);
|
||||
a_cp = (char *)vaddr[0] + a_pg_offset;
|
||||
b_cp = (char *)vaddr[1] + b_pg_offset;
|
||||
bcopy(a_cp, b_cp, cnt);
|
||||
if (__predict_false(pinned)) {
|
||||
sched_unpin();
|
||||
mtx_unlock(&cpage_lock);
|
||||
pinned = FALSE;
|
||||
}
|
||||
if (__predict_false(mapped))
|
||||
pmap_unmap_io_transient(pages, vaddr, 2, FALSE);
|
||||
a_offset += cnt;
|
||||
b_offset += cnt;
|
||||
xfersize -= cnt;
|
||||
@ -6901,6 +6851,107 @@ pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num)
|
||||
*num = idx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the kernel virtual address of a set of physical pages. If there are
|
||||
* physical addresses not covered by the DMAP perform a transient mapping
|
||||
* that will be removed when calling pmap_unmap_io_transient.
|
||||
*
|
||||
* \param page The pages the caller wishes to obtain the virtual
|
||||
* address on the kernel memory map.
|
||||
* \param vaddr On return contains the kernel virtual memory address
|
||||
* of the pages passed in the page parameter.
|
||||
* \param count Number of pages passed in.
|
||||
* \param can_fault TRUE if the thread using the mapped pages can take
|
||||
* page faults, FALSE otherwise.
|
||||
*
|
||||
* \returns TRUE if the caller must call pmap_unmap_io_transient when
|
||||
* finished or FALSE otherwise.
|
||||
*
|
||||
*/
|
||||
boolean_t
|
||||
pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
|
||||
boolean_t can_fault)
|
||||
{
|
||||
vm_paddr_t paddr;
|
||||
boolean_t needs_mapping;
|
||||
pt_entry_t *pte;
|
||||
int cache_bits, error, i;
|
||||
|
||||
/*
|
||||
* Allocate any KVA space that we need, this is done in a separate
|
||||
* loop to prevent calling vmem_alloc while pinned.
|
||||
*/
|
||||
needs_mapping = FALSE;
|
||||
for (i = 0; i < count; i++) {
|
||||
paddr = VM_PAGE_TO_PHYS(page[i]);
|
||||
if (__predict_false(paddr >= dmaplimit)) {
|
||||
error = vmem_alloc(kernel_arena, PAGE_SIZE,
|
||||
M_BESTFIT | M_WAITOK, &vaddr[i]);
|
||||
KASSERT(error == 0, ("vmem_alloc failed: %d", error));
|
||||
needs_mapping = TRUE;
|
||||
} else {
|
||||
vaddr[i] = PHYS_TO_DMAP(paddr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Exit early if everything is covered by the DMAP */
|
||||
if (!needs_mapping)
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* NB: The sequence of updating a page table followed by accesses
|
||||
* to the corresponding pages used in the !DMAP case is subject to
|
||||
* the situation described in the "AMD64 Architecture Programmer's
|
||||
* Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
|
||||
* Coherency Considerations". Therefore, issuing the INVLPG right
|
||||
* after modifying the PTE bits is crucial.
|
||||
*/
|
||||
if (!can_fault)
|
||||
sched_pin();
|
||||
for (i = 0; i < count; i++) {
|
||||
paddr = VM_PAGE_TO_PHYS(page[i]);
|
||||
if (paddr >= dmaplimit) {
|
||||
if (can_fault) {
|
||||
/*
|
||||
* Slow path, since we can get page faults
|
||||
* while mappings are active don't pin the
|
||||
* thread to the CPU and instead add a global
|
||||
* mapping visible to all CPUs.
|
||||
*/
|
||||
pmap_qenter(vaddr[i], &page[i], 1);
|
||||
} else {
|
||||
pte = vtopte(vaddr[i]);
|
||||
cache_bits = pmap_cache_bits(kernel_pmap,
|
||||
page[i]->md.pat_mode, 0);
|
||||
pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
|
||||
cache_bits);
|
||||
invlpg(vaddr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (needs_mapping);
|
||||
}
|
||||
|
||||
void
|
||||
pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
|
||||
boolean_t can_fault)
|
||||
{
|
||||
vm_paddr_t paddr;
|
||||
int i;
|
||||
|
||||
if (!can_fault)
|
||||
sched_unpin();
|
||||
for (i = 0; i < count; i++) {
|
||||
paddr = VM_PAGE_TO_PHYS(page[i]);
|
||||
if (paddr >= dmaplimit) {
|
||||
if (can_fault)
|
||||
pmap_qremove(vaddr[i], 1);
|
||||
vmem_free(kernel_arena, vaddr[i], PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "opt_ddb.h"
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
|
@ -61,10 +61,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
|
||||
struct thread *td = curthread;
|
||||
struct iovec *iov;
|
||||
void *cp;
|
||||
vm_offset_t page_offset;
|
||||
vm_offset_t page_offset, vaddr;
|
||||
size_t cnt;
|
||||
int error = 0;
|
||||
int save = 0;
|
||||
boolean_t mapped;
|
||||
|
||||
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
|
||||
("uiomove_fromphys: mode"));
|
||||
@ -72,6 +73,7 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
|
||||
("uiomove_fromphys proc"));
|
||||
save = td->td_pflags & TDP_DEADLKTREAT;
|
||||
td->td_pflags |= TDP_DEADLKTREAT;
|
||||
mapped = FALSE;
|
||||
while (n > 0 && uio->uio_resid) {
|
||||
iov = uio->uio_iov;
|
||||
cnt = iov->iov_len;
|
||||
@ -84,8 +86,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
|
||||
cnt = n;
|
||||
page_offset = offset & PAGE_MASK;
|
||||
cnt = min(cnt, PAGE_SIZE - page_offset);
|
||||
cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) +
|
||||
page_offset;
|
||||
if (uio->uio_segflg != UIO_NOCOPY) {
|
||||
mapped = pmap_map_io_transient(
|
||||
&ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE);
|
||||
cp = (char *)vaddr + page_offset;
|
||||
}
|
||||
switch (uio->uio_segflg) {
|
||||
case UIO_USERSPACE:
|
||||
maybe_yield();
|
||||
@ -105,6 +110,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
|
||||
case UIO_NOCOPY:
|
||||
break;
|
||||
}
|
||||
if (__predict_false(mapped)) {
|
||||
pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT],
|
||||
&vaddr, 1, TRUE);
|
||||
mapped = FALSE;
|
||||
}
|
||||
iov->iov_base = (char *)iov->iov_base + cnt;
|
||||
iov->iov_len -= cnt;
|
||||
uio->uio_resid -= cnt;
|
||||
@ -113,6 +123,9 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
|
||||
n -= cnt;
|
||||
}
|
||||
out:
|
||||
if (__predict_false(mapped))
|
||||
pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1,
|
||||
TRUE);
|
||||
if (save == 0)
|
||||
td->td_pflags &= ~TDP_DEADLKTREAT;
|
||||
return (error);
|
||||
|
@ -397,6 +397,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
|
||||
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
|
||||
boolean_t force);
|
||||
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
|
||||
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
|
||||
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
@ -175,8 +175,18 @@
|
||||
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
|
||||
#define VM_MIN_ADDRESS (0)
|
||||
|
||||
#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
|
||||
#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS)
|
||||
#define PHYS_TO_DMAP(x) ({ \
|
||||
KASSERT((x) < dmaplimit, \
|
||||
("physical address %#jx not covered by the DMAP", \
|
||||
(uintmax_t)x)); \
|
||||
(x) | DMAP_MIN_ADDRESS; })
|
||||
|
||||
#define DMAP_TO_PHYS(x) ({ \
|
||||
KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \
|
||||
(x) >= DMAP_MIN_ADDRESS, \
|
||||
("virtual address %#jx not covered by the DMAP", \
|
||||
(uintmax_t)x)); \
|
||||
(x) & ~DMAP_MIN_ADDRESS; })
|
||||
|
||||
/*
|
||||
* How many physical pages per kmem arena virtual page.
|
||||
|
Loading…
Reference in New Issue
Block a user