Use PCID to optimize PTI.

Use PCID to avoid complete TLB shootdown when switching between user
and kernel mode with PTI enabled.

I use the model close to what I read about KAISER, user-mode PCID has
1:1 correspondence to the kernel-mode PCID, by setting bit 11 in PCID.
Full kernel-mode TLB shootdown is performed on context switches, since
KVA TLB invalidation only works in the current pmap. User-mode part of
TLB is flushed on the pmap activations as well.

Similarly, IPI TLB shootdowns must handle both kernel and user address
spaces for each address.  Note that machines which implement PCID but
do not have INVPCID instructions, cause the usual complications in the
IPI handlers, due to the need to switch to the target PCID temporary.
This is racy, but because for PCID/no-INVPCID we disable the
interrupts in pmap_activate_sw(), IPI handler cannot see inconsistent
state of CPU PCID vs PCPU pmap/kcr3/ucr3 pointers.

On the other hand, on kernel/user switches, CR3_PCID_SAVE bit is set
and we do not clear TLB.

I can imagine alternative use of PCID, where there is only one PCID
allocated for the kernel pmap. Then, there is no need to shootdown
kernel TLB entries on context switch. But copyout(3) would need to
either use method similar to proc_rwmem() to access the userspace
data, or (in reverse) provide a temporal mapping for the kernel buffer
into user mode PCID and use trampoline for copy.

Reviewed by:	markj (previous version)
Tested by:	pho
Discussed with:	alc (some aspects)
Sponsored by:	The FreeBSD Foundation
MFC after:	3 weeks
Differential revision:	https://reviews.freebsd.org/D13985
This commit is contained in:
Konstantin Belousov 2018-01-27 11:49:37 +00:00
parent e65c8c1afb
commit c8f9c1f3d9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=328470
10 changed files with 452 additions and 46 deletions

View File

@ -184,10 +184,14 @@ invltlb_ret:
call invltlb_pcid_handler
jmp invltlb_ret
INTR_HANDLER invltlb_invpcid
INTR_HANDLER invltlb_invpcid_nopti
call invltlb_invpcid_handler
jmp invltlb_ret
INTR_HANDLER invltlb_invpcid_pti
call invltlb_invpcid_pti_handler
jmp invltlb_ret
/*
* Single page TLB shootdown
*/
@ -195,6 +199,14 @@ invltlb_ret:
call invlpg_handler
jmp invltlb_ret
INTR_HANDLER invlpg_invpcid
call invlpg_invpcid_handler
jmp invltlb_ret
INTR_HANDLER invlpg_pcid
call invlpg_pcid_handler
jmp invltlb_ret
/*
* Page range TLB shootdown.
*/
@ -202,6 +214,14 @@ invltlb_ret:
call invlrng_handler
jmp invltlb_ret
INTR_HANDLER invlrng_invpcid
call invlrng_invpcid_handler
jmp invltlb_ret
INTR_HANDLER invlrng_pcid
call invlrng_pcid_handler
jmp invltlb_ret
/*
* Invalidate cache.
*/

View File

@ -133,20 +133,30 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for TLB invalidation */
if (pmap_pcid_enabled) {
if (invpcid_works) {
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti) :
IDTVEC(invltlb_invpcid), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLTLB, pti ?
IDTVEC(invltlb_invpcid_pti_pti) :
IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT,
SEL_KPL, 0);
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) :
IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) :
IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0);
} else {
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) :
IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) :
IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) :
IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0);
}
} else {
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb),
SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
SDT_SYSIGT, SEL_KPL, 0);
}
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg),
SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng),
SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for cache invalidation. */
setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache),
@ -439,10 +449,44 @@ invltlb_invpcid_handler(void)
PCPU_SET(smp_tlb_done, generation);
}
void
invltlb_invpcid_pti_handler(void)
{
struct invpcid_descr d;
uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
generation = smp_tlb_generation;
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
if (smp_tlb_pmap == kernel_pmap) {
/*
* This invalidation actually needs to clear kernel
* mappings from the TLB in the current pmap, but
* since we were asked for the flush in the kernel
* pmap, achieve it by performing global flush.
*/
invpcid(&d, INVPCID_CTXGLOB);
} else {
invpcid(&d, INVPCID_CTX);
d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
}
PCPU_SET(smp_tlb_done, generation);
}
void
invltlb_pcid_handler(void)
{
uint32_t generation;
uint64_t kcr3, ucr3;
uint32_t generation, pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
@ -463,9 +507,132 @@ invltlb_pcid_handler(void)
* CPU.
*/
if (PCPU_GET(curpmap) == smp_tlb_pmap) {
load_cr3(smp_tlb_pmap->pm_cr3 |
smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid;
ucr3 = smp_tlb_pmap->pm_ucr3;
if (ucr3 != PMAP_NO_CR3) {
ucr3 |= PMAP_PCID_USER_PT | pcid;
pmap_pti_pcid_invalidate(ucr3, kcr3);
} else
load_cr3(kcr3);
}
}
PCPU_SET(smp_tlb_done, generation);
}
void
invlpg_invpcid_handler(void)
{
struct invpcid_descr d;
uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = smp_tlb_addr1;
invpcid(&d, INVPCID_ADDR);
}
PCPU_SET(smp_tlb_done, generation);
}
void
invlpg_pcid_handler(void)
{
uint64_t kcr3, ucr3;
uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1);
}
PCPU_SET(smp_tlb_done, generation);
}
void
invlrng_invpcid_handler(void)
{
struct invpcid_descr d;
vm_offset_t addr, addr2;
uint32_t generation;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = smp_tlb_addr1;
do {
invpcid(&d, INVPCID_ADDR);
d.addr += PAGE_SIZE;
} while (d.addr < addr2);
}
PCPU_SET(smp_tlb_done, generation);
}
void
invlrng_pcid_handler(void)
{
vm_offset_t addr, addr2;
uint64_t kcr3, ucr3;
uint32_t generation;
uint32_t pcid;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) {
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2);
}
PCPU_SET(smp_tlb_done, generation);
}

View File

@ -1060,6 +1060,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_cr3 = KPML4phys;
kernel_pmap->pm_ucr3 = PMAP_NO_CR3;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
@ -1097,8 +1098,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
pmap_init_pat();
/* Initialize TLB Context Id. */
if (pti)
pmap_pcid_enabled = 0;
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
/* Check for INVPCID support */
@ -1576,6 +1575,9 @@ void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t *mask;
struct invpcid_descr d;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
@ -1592,9 +1594,32 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap))
if (pmap == PCPU_GET(curpmap)) {
invlpg(va);
else if (pmap_pcid_enabled)
if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
/*
* Disable context switching. pm_pcid
* is recalculated on switch, which
* might make us use wrong pcid below.
*/
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works) {
d.pcid = pcid | PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = va;
invpcid(&d, INVPCID_ADDR);
} else {
kcr3 = pmap->pm_cr3 | pcid |
CR3_PCID_SAVE;
ucr3 = pmap->pm_ucr3 | pcid |
PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlpg(ucr3, kcr3, va);
}
critical_exit();
}
} else if (pmap_pcid_enabled)
pmap->pm_pcids[cpuid].pm_gen = 0;
if (pmap_pcid_enabled) {
CPU_FOREACH(i) {
@ -1604,7 +1629,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
}
mask = &pmap->pm_active;
}
smp_masked_invlpg(*mask, va);
smp_masked_invlpg(*mask, va, pmap);
sched_unpin();
}
@ -1615,7 +1640,10 @@ void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
cpuset_t *mask;
struct invpcid_descr d;
vm_offset_t addr;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
@ -1641,6 +1669,26 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
if (pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) {
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works) {
d.pcid = pcid | PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = sva;
for (; d.addr < eva; d.addr +=
PAGE_SIZE)
invpcid(&d, INVPCID_ADDR);
} else {
kcr3 = pmap->pm_cr3 | pcid |
CR3_PCID_SAVE;
ucr3 = pmap->pm_ucr3 | pcid |
PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlrng(ucr3, kcr3, sva,
eva);
}
critical_exit();
}
} else if (pmap_pcid_enabled) {
pmap->pm_pcids[cpuid].pm_gen = 0;
}
@ -1652,7 +1700,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
}
mask = &pmap->pm_active;
}
smp_masked_invlpg_range(*mask, sva, eva);
smp_masked_invlpg_range(*mask, sva, eva, pmap);
sched_unpin();
}
@ -1661,6 +1709,8 @@ pmap_invalidate_all(pmap_t pmap)
{
cpuset_t *mask;
struct invpcid_descr d;
uint64_t kcr3, ucr3;
uint32_t pcid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
@ -1684,15 +1734,29 @@ pmap_invalidate_all(pmap_t pmap)
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
if (pmap_pcid_enabled) {
critical_enter();
pcid = pmap->pm_pcids[cpuid].pm_pcid;
if (invpcid_works) {
d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
d.pcid = pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
if (pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
}
} else {
load_cr3(pmap->pm_cr3 | pmap->pm_pcids
[PCPU_GET(cpuid)].pm_pcid);
kcr3 = pmap->pm_cr3 | pcid;
ucr3 = pmap->pm_ucr3;
if (ucr3 != PMAP_NO_CR3) {
ucr3 |= pcid | PMAP_PCID_USER_PT;
pmap_pti_pcid_invalidate(ucr3,
kcr3);
} else {
load_cr3(kcr3);
}
}
critical_exit();
} else {
invltlb();
}
@ -1797,6 +1861,9 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
struct invpcid_descr d;
uint64_t kcr3, ucr3;
uint32_t pcid;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@ -1805,16 +1872,35 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
KASSERT(pmap->pm_type == PT_X86,
("pmap_invalidate_range: unknown type %d", pmap->pm_type));
if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap))
if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) {
invlpg(va);
else if (pmap_pcid_enabled)
if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled &&
pmap->pm_ucr3 != PMAP_NO_CR3) {
critical_enter();
pcid = pmap->pm_pcids[0].pm_pcid;
if (invpcid_works) {
d.pcid = pcid | PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = va;
invpcid(&d, INVPCID_ADDR);
} else {
kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE;
ucr3 = pmap->pm_ucr3 | pcid |
PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlpg(ucr3, kcr3, va);
}
critical_exit();
}
} else if (pmap_pcid_enabled)
pmap->pm_pcids[0].pm_gen = 0;
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct invpcid_descr d;
vm_offset_t addr;
uint64_t kcr3, ucr3;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@ -1826,6 +1912,25 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled &&
pmap->pm_ucr3 != PMAP_NO_CR3) {
critical_enter();
if (invpcid_works) {
d.pcid = pmap->pm_pcids[0].pm_pcid |
PMAP_PCID_USER_PT;
d.pad = 0;
d.addr = sva;
for (; d.addr < eva; d.addr += PAGE_SIZE)
invpcid(&d, INVPCID_ADDR);
} else {
kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].
pm_pcid | CR3_PCID_SAVE;
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[0].
pm_pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE;
pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva);
}
critical_exit();
}
} else if (pmap_pcid_enabled) {
pmap->pm_pcids[0].pm_gen = 0;
}
@ -1835,6 +1940,7 @@ void
pmap_invalidate_all(pmap_t pmap)
{
struct invpcid_descr d;
uint64_t kcr3, ucr3;
if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
pmap->pm_eptgen++;
@ -1852,15 +1958,26 @@ pmap_invalidate_all(pmap_t pmap)
}
} else if (pmap == PCPU_GET(curpmap)) {
if (pmap_pcid_enabled) {
critical_enter();
if (invpcid_works) {
d.pcid = pmap->pm_pcids[0].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
if (pmap->pm_ucr3 != PMAP_NO_CR3) {
d.pcid |= PMAP_PCID_USER_PT;
invpcid(&d, INVPCID_CTX);
}
} else {
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0].
pm_pcid);
kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].pm_pcid;
if (pmap->pm_ucr3 != PMAP_NO_CR3) {
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[
0].pm_pcid | PMAP_PCID_USER_PT;
pmap_pti_pcid_invalidate(ucr3, kcr3);
} else
load_cr3(kcr3);
}
critical_exit();
} else {
invltlb();
}
@ -2398,7 +2515,8 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
pmap->pm_pml4u = NULL;
pmap->pm_cr3 = KPML4phys;
pmap->pm_ucr3 = ~0UL;
/* hack to keep pmap_pti_pcid_invalidate() alive */
pmap->pm_ucr3 = PMAP_NO_CR3;
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
@ -2408,7 +2526,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
pmap->pm_pcids[i].pm_gen = 0;
if (!pti)
__pcpu[i].pc_kcr3 = ~0ul;
__pcpu[i].pc_kcr3 = PMAP_NO_CR3;
}
PCPU_SET(curpmap, kernel_pmap);
pmap_activate(curthread);
@ -2472,7 +2590,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
pmap->pm_pcids[i].pm_gen = 0;
}
pmap->pm_cr3 = ~0l; /* initialize to an invalid value */
pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */
pmap->pm_ucr3 = PMAP_NO_CR3;
pmap->pm_pml4u = NULL;
pmap->pm_type = pm_type;
@ -7134,13 +7253,15 @@ pmap_pcid_alloc(pmap_t pmap, u_int cpuid)
CRITICAL_ASSERT(curthread);
gen = PCPU_GET(pcid_gen);
if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
pmap->pm_pcids[cpuid].pm_gen == gen)
if (!pti && (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
pmap->pm_pcids[cpuid].pm_gen == gen))
return (CR3_PCID_SAVE);
pcid_next = PCPU_GET(pcid_next);
KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x",
cpuid, pcid_next));
if (pcid_next == PMAP_PCID_OVERMAX) {
KASSERT((!pti && pcid_next <= PMAP_PCID_OVERMAX) ||
(pti && pcid_next <= PMAP_PCID_OVERMAX_KERN),
("cpu %d pcid_next %#x", cpuid, pcid_next));
if ((!pti && pcid_next == PMAP_PCID_OVERMAX) ||
(pti && pcid_next == PMAP_PCID_OVERMAX_KERN)) {
new_gen = gen + 1;
if (new_gen == 0)
new_gen = 1;
@ -7159,7 +7280,8 @@ void
pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
uint64_t cached, cr3;
struct invpcid_descr d;
uint64_t cached, cr3, kcr3, ucr3;
register_t rflags;
u_int cpuid;
@ -7215,6 +7337,32 @@ pmap_activate_sw(struct thread *td)
PCPU_INC(pm_save_cnt);
}
PCPU_SET(curpmap, pmap);
if (pti) {
kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid;
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid |
PMAP_PCID_USER_PT;
/*
* Manually invalidate translations cached
* from the user page table, which are not
* flushed by reload of cr3 with the kernel
* page table pointer above.
*/
if (pmap->pm_ucr3 != PMAP_NO_CR3) {
if (invpcid_works) {
d.pcid = PMAP_PCID_USER_PT |
pmap->pm_pcids[cpuid].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
} else {
pmap_pti_pcid_invalidate(ucr3, kcr3);
}
}
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE);
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE);
}
if (!invpcid_works)
intr_restore(rflags);
} else if (cr3 != pmap->pm_cr3) {

View File

@ -802,3 +802,51 @@ msr_onfault:
movl $EFAULT,%eax
POP_FRAME_POINTER
ret
/*
* void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
* Invalidates address space addressed by ucr3, then returns to kcr3.
* Done in assembler to ensure no other memory accesses happen while
* on ucr3.
*/
ALIGN_TEXT
ENTRY(pmap_pti_pcid_invalidate)
pushfq
cli
movq %rdi,%cr3 /* to user page table */
movq %rsi,%cr3 /* back to kernel */
popfq
retq
/*
* void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
* Invalidates virtual address va in address space ucr3, then returns to kcr3.
*/
ALIGN_TEXT
ENTRY(pmap_pti_pcid_invlpg)
pushfq
cli
movq %rdi,%cr3 /* to user page table */
invlpg (%rdx)
movq %rsi,%cr3 /* back to kernel */
popfq
retq
/*
* void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
* vm_offset_t eva);
* Invalidates virtual addresses between sva and eva in address space ucr3,
* then returns to kcr3.
*/
ALIGN_TEXT
ENTRY(pmap_pti_pcid_invlrng)
pushfq
cli
movq %rdi,%cr3 /* to user page table */
1: invlpg (%rdx)
addq $PAGE_SIZE,%rdx
cmpq %rdx,%rcx
ja 1b
movq %rsi,%cr3 /* back to kernel */
popfq
retq

View File

@ -225,6 +225,10 @@
#define PMAP_PCID_NONE 0xffffffff
#define PMAP_PCID_KERN 0
#define PMAP_PCID_OVERMAX 0x1000
#define PMAP_PCID_OVERMAX_KERN 0x800
#define PMAP_PCID_USER_PT 0x800
#define PMAP_NO_CR3 (~0UL)
#ifndef LOCORE
@ -433,6 +437,10 @@ boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec);
void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva);
void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
vm_offset_t eva);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */

View File

@ -28,15 +28,23 @@ extern u_int32_t mptramp_pagetables;
/* IPI handlers */
inthand_t
IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */
IDTVEC(justreturn), /* interrupt CPU with minimum overhead */
IDTVEC(invltlb_pcid_pti),
IDTVEC(invltlb_invpcid_pti),
IDTVEC(justreturn1_pti),
IDTVEC(invltlb_pti),
IDTVEC(invltlb_pcid_pti),
IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
IDTVEC(invltlb_invpcid_pti_pti),
IDTVEC(invltlb_invpcid_nopti),
IDTVEC(invlpg_pti),
IDTVEC(invlpg_invpcid_pti),
IDTVEC(invlpg_invpcid),
IDTVEC(invlpg_pcid_pti),
IDTVEC(invlpg_pcid),
IDTVEC(invlrng_pti),
IDTVEC(invlrng_invpcid_pti),
IDTVEC(invlrng_invpcid),
IDTVEC(invlrng_pcid_pti),
IDTVEC(invlrng_pcid),
IDTVEC(invlcache_pti),
IDTVEC(ipi_intr_bitmap_handler_pti),
IDTVEC(cpustop_pti),
@ -45,6 +53,11 @@ inthand_t
void invltlb_pcid_handler(void);
void invltlb_invpcid_handler(void);
void invltlb_invpcid_pti_handler(void);
void invlpg_invpcid_handler(void);
void invlpg_pcid_handler(void);
void invlrng_invpcid_handler(void);
void invlrng_pcid_handler(void);
int native_start_all_aps(void);
#endif /* !LOCORE */

View File

@ -1045,7 +1045,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
smp_masked_invlpg(*mask, va);
smp_masked_invlpg(*mask, va, pmap);
sched_unpin();
}
@ -1079,7 +1079,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
smp_masked_invlpg_range(*mask, sva, eva);
smp_masked_invlpg_range(*mask, sva, eva, pmap);
sched_unpin();
}

View File

@ -768,7 +768,7 @@ sf_buf_shootdown(struct sf_buf *sf, int flags)
CPU_NAND(&other_cpus, &sf->cpumask);
if (!CPU_EMPTY(&other_cpus)) {
CPU_OR(&sf->cpumask, &other_cpus);
smp_masked_invlpg(other_cpus, sf->kva);
smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap);
}
}
sched_unpin();

View File

@ -39,6 +39,7 @@ extern int cpu_logical;
extern int cpu_cores;
extern volatile uint32_t smp_tlb_generation;
extern struct pmap *smp_tlb_pmap;
extern vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
extern u_int xhits_gbl[];
extern u_int xhits_pg[];
extern u_int xhits_rng[];
@ -97,9 +98,9 @@ void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
vm_offset_t endva, struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
void mem_range_AP_init(void);
void topo_probe(void);

View File

@ -1506,7 +1506,7 @@ SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
*/
/* Variables needed for SMP tlb shootdown. */
static vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
pmap_t smp_tlb_pmap;
volatile uint32_t smp_tlb_generation;
@ -1583,11 +1583,11 @@ smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
}
void
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
@ -1595,11 +1595,12 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
}
void
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
pmap_t pmap)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL,
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap,
addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;