amd64: make uiomove_fromphys functional for pages not mapped by the DMAP

Place the code introduced in r268660 into a separate function that can be
called from uiomove_fromphys. Instead of pre-allocating two KVA pages use
vmem_alloc to allocate them on demand when needed. This prevents blocking if
a page fault is taken while physical addresses from outside the DMAP are
used, since the lock is now removed.

Also introduce a safety catch in PHYS_TO_DMAP and DMAP_TO_PHYS.

Sponsored by: Citrix Systems R&D
Reviewed by:		kib
Differential Revision:	https://reviews.freebsd.org/D947

amd64/amd64/pmap.c:
 - Factor out the code to deal with non DMAP addresses from pmap_copy_pages
   and place it in pmap_map_io_transient.
 - Change the code to use vmem_alloc instead of a set of pre-allocated
   pages.
 - Use pmap_qenter and don't pin the thread if there can be page faults.

amd64/amd64/uio_machdep.c:
 - Use pmap_map_io_transient in order to correctly deal with physical
   addresses not covered by the DMAP.

amd64/include/pmap.h:
 - Add the prototypes for the new functions.

amd64/include/vmparam.h:
 - Add safety catches to make sure PHYS_TO_DMAP and DMAP_TO_PHYS are only
   used with addresses covered by the DMAP.
This commit is contained in:
Roger Pau Monné 2014-10-24 09:48:58 +00:00
parent cd033efc14
commit 927dc0e02a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=273582
4 changed files with 142 additions and 66 deletions

View File

@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/vmem.h>
#include <sys/vmmeter.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
@ -402,11 +403,6 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
/* pmap_copy_pages() over non-DMAP */
static struct mtx cpage_lock;
static vm_offset_t cpage_a;
static vm_offset_t cpage_b;
/*
* Crashdump maps.
*/
@ -1072,10 +1068,6 @@ pmap_init(void)
M_WAITOK | M_ZERO);
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF);
cpage_a = kva_alloc(PAGE_SIZE);
cpage_b = kva_alloc(PAGE_SIZE);
}
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
@ -5056,66 +5048,24 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
{
void *a_cp, *b_cp;
vm_page_t m_a, m_b;
vm_paddr_t p_a, p_b;
pt_entry_t *pte;
vm_offset_t a_pg_offset, b_pg_offset;
vm_page_t pages[2];
vm_offset_t vaddr[2], a_pg_offset, b_pg_offset;
int cnt;
boolean_t pinned;
boolean_t mapped;
/*
* NB: The sequence of updating a page table followed by accesses
* to the corresponding pages used in the !DMAP case is subject to
* the situation described in the "AMD64 Architecture Programmer's
* Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
* Coherency Considerations". Therefore, issuing the INVLPG right
* after modifying the PTE bits is crucial.
*/
pinned = FALSE;
while (xfersize > 0) {
a_pg_offset = a_offset & PAGE_MASK;
m_a = ma[a_offset >> PAGE_SHIFT];
p_a = m_a->phys_addr;
pages[0] = ma[a_offset >> PAGE_SHIFT];
b_pg_offset = b_offset & PAGE_MASK;
m_b = mb[b_offset >> PAGE_SHIFT];
p_b = m_b->phys_addr;
pages[1] = mb[b_offset >> PAGE_SHIFT];
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
if (__predict_false(p_a < DMAP_MIN_ADDRESS ||
p_a > DMAP_MIN_ADDRESS + dmaplimit)) {
mtx_lock(&cpage_lock);
sched_pin();
pinned = TRUE;
pte = vtopte(cpage_a);
*pte = p_a | X86_PG_A | X86_PG_V |
pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0);
invlpg(cpage_a);
a_cp = (char *)cpage_a + a_pg_offset;
} else {
a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
}
if (__predict_false(p_b < DMAP_MIN_ADDRESS ||
p_b > DMAP_MIN_ADDRESS + dmaplimit)) {
if (!pinned) {
mtx_lock(&cpage_lock);
sched_pin();
pinned = TRUE;
}
pte = vtopte(cpage_b);
*pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW |
X86_PG_V | pmap_cache_bits(kernel_pmap,
m_b->md.pat_mode, 0);
invlpg(cpage_b);
b_cp = (char *)cpage_b + b_pg_offset;
} else {
b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
}
mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE);
a_cp = (char *)vaddr[0] + a_pg_offset;
b_cp = (char *)vaddr[1] + b_pg_offset;
bcopy(a_cp, b_cp, cnt);
if (__predict_false(pinned)) {
sched_unpin();
mtx_unlock(&cpage_lock);
pinned = FALSE;
}
if (__predict_false(mapped))
pmap_unmap_io_transient(pages, vaddr, 2, FALSE);
a_offset += cnt;
b_offset += cnt;
xfersize -= cnt;
@ -6901,6 +6851,107 @@ pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num)
*num = idx;
}
/**
* Get the kernel virtual address of a set of physical pages. If there are
* physical addresses not covered by the DMAP perform a transient mapping
* that will be removed when calling pmap_unmap_io_transient.
*
* \param page The pages the caller wishes to obtain the virtual
* address on the kernel memory map.
* \param vaddr On return contains the kernel virtual memory address
* of the pages passed in the page parameter.
* \param count Number of pages passed in.
* \param can_fault TRUE if the thread using the mapped pages can take
* page faults, FALSE otherwise.
*
* \returns TRUE if the caller must call pmap_unmap_io_transient when
* finished or FALSE otherwise.
*
*/
boolean_t
pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
boolean_t can_fault)
{
vm_paddr_t paddr;
boolean_t needs_mapping;
pt_entry_t *pte;
int cache_bits, error, i;
/*
* Allocate any KVA space that we need, this is done in a separate
* loop to prevent calling vmem_alloc while pinned.
*/
needs_mapping = FALSE;
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (__predict_false(paddr >= dmaplimit)) {
error = vmem_alloc(kernel_arena, PAGE_SIZE,
M_BESTFIT | M_WAITOK, &vaddr[i]);
KASSERT(error == 0, ("vmem_alloc failed: %d", error));
needs_mapping = TRUE;
} else {
vaddr[i] = PHYS_TO_DMAP(paddr);
}
}
/* Exit early if everything is covered by the DMAP */
if (!needs_mapping)
return (FALSE);
/*
* NB: The sequence of updating a page table followed by accesses
* to the corresponding pages used in the !DMAP case is subject to
* the situation described in the "AMD64 Architecture Programmer's
* Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
* Coherency Considerations". Therefore, issuing the INVLPG right
* after modifying the PTE bits is crucial.
*/
if (!can_fault)
sched_pin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (paddr >= dmaplimit) {
if (can_fault) {
/*
* Slow path, since we can get page faults
* while mappings are active don't pin the
* thread to the CPU and instead add a global
* mapping visible to all CPUs.
*/
pmap_qenter(vaddr[i], &page[i], 1);
} else {
pte = vtopte(vaddr[i]);
cache_bits = pmap_cache_bits(kernel_pmap,
page[i]->md.pat_mode, 0);
pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
cache_bits);
invlpg(vaddr[i]);
}
}
}
return (needs_mapping);
}
void
pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
boolean_t can_fault)
{
vm_paddr_t paddr;
int i;
if (!can_fault)
sched_unpin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (paddr >= dmaplimit) {
if (can_fault)
pmap_qremove(vaddr[i], 1);
vmem_free(kernel_arena, vaddr[i], PAGE_SIZE);
}
}
}
#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>

View File

@ -61,10 +61,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
struct thread *td = curthread;
struct iovec *iov;
void *cp;
vm_offset_t page_offset;
vm_offset_t page_offset, vaddr;
size_t cnt;
int error = 0;
int save = 0;
boolean_t mapped;
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
("uiomove_fromphys: mode"));
@ -72,6 +73,7 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
("uiomove_fromphys proc"));
save = td->td_pflags & TDP_DEADLKTREAT;
td->td_pflags |= TDP_DEADLKTREAT;
mapped = FALSE;
while (n > 0 && uio->uio_resid) {
iov = uio->uio_iov;
cnt = iov->iov_len;
@ -84,8 +86,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
cnt = n;
page_offset = offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - page_offset);
cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) +
page_offset;
if (uio->uio_segflg != UIO_NOCOPY) {
mapped = pmap_map_io_transient(
&ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE);
cp = (char *)vaddr + page_offset;
}
switch (uio->uio_segflg) {
case UIO_USERSPACE:
maybe_yield();
@ -105,6 +110,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
case UIO_NOCOPY:
break;
}
if (__predict_false(mapped)) {
pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT],
&vaddr, 1, TRUE);
mapped = FALSE;
}
iov->iov_base = (char *)iov->iov_base + cnt;
iov->iov_len -= cnt;
uio->uio_resid -= cnt;
@ -113,6 +123,9 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
n -= cnt;
}
out:
if (__predict_false(mapped))
pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1,
TRUE);
if (save == 0)
td->td_pflags &= ~TDP_DEADLKTREAT;
return (error);

View File

@ -397,6 +397,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
#endif /* _KERNEL */
#endif /* !LOCORE */

View File

@ -175,8 +175,18 @@
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
#define VM_MIN_ADDRESS (0)
#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS)
#define PHYS_TO_DMAP(x) ({ \
KASSERT((x) < dmaplimit, \
("physical address %#jx not covered by the DMAP", \
(uintmax_t)x)); \
(x) | DMAP_MIN_ADDRESS; })
#define DMAP_TO_PHYS(x) ({ \
KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \
(x) >= DMAP_MIN_ADDRESS, \
("virtual address %#jx not covered by the DMAP", \
(uintmax_t)x)); \
(x) & ~DMAP_MIN_ADDRESS; })
/*
* How many physical pages per kmem arena virtual page.