Implement support for the process-context identifiers ('PCID') on

Intel CPUs.  The feature tags TLB entries with the Id of the address
space and allows to avoid TLB invalidation on the context switch, it
is available only in the long mode.  In the microbenchmarks, using the
PCID decreased latency of the context switches by ~30% on SandyBridge
class desktop CPUs, measured with the lat_ctx program from lmbench.

If available, use INVPCID instruction when a TLB entry in non-current
address space needs to be invalidated.  The instruction is typically
available on the Haswell.

If needed, the use of PCID can be turned off with the
vm.pmap.pcid_enabled loader tunable set to 0.  The state of the
feature is reported by the vm.pmap.pcid_enabled sysctl.  The sysctl
vm.pmap.pcid_save_cnt reports the number of context switches which
avoided invalidating the TLB; compare with the total number of context
switches, available as sysctl vm.stats.sys.v_swtch.

Sponsored by:	The FreeBSD Foundation
Reviewed by:	alc
Tested by:	pho, bf
This commit is contained in:
Konstantin Belousov 2013-08-30 07:59:49 +00:00
parent a0887a4c2d
commit 37eed8419c
10 changed files with 519 additions and 105 deletions

View File

@ -43,6 +43,12 @@
#include "assym.s"
#ifdef SMP
#define LK lock ;
#else
#define LK
#endif
/*
* I/O Interrupt Entry Point. Rather than having one entry point for
* each interrupt source, we use one entry point for each 32-bit word
@ -149,6 +155,38 @@ IDTVEC(xen_intr_upcall)
* Global address space TLB shootdown.
*/
.text
#define NAKE_INTR_CS 24
SUPERALIGN_TEXT
global_invltlb:
movl %cr4,%eax
andl $~0x80,%eax
movl %eax,%cr4
orl $0x80,%eax
movl %eax,%cr4
invltlb_ret_clear_pm_save:
movq smp_tlb_pmap,%rdx
testq %rdx,%rdx
jz invltlb_ret
testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
jz 1f
swapgs
1:
movl PCPU(CPUID),%eax
jz 2f
swapgs
2:
LK btcl %eax,PM_SAVE(%rdx)
SUPERALIGN_TEXT
invltlb_ret:
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
LK incl smp_tlb_wait
popq %rdx
popq %rax
jmp doreti_iret
SUPERALIGN_TEXT
IDTVEC(invltlb)
#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
@ -165,18 +203,44 @@ IDTVEC(invltlb)
#endif
pushq %rax
pushq %rdx
movq %cr3, %rax /* invalidate the TLB */
movq %rax, %cr3
movq %cr3,%rax
cmpl $0,pmap_pcid_enabled
je 2f
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
movq $smp_tlb_invpcid,%rdx
cmpl $0,(%rdx)
je global_invltlb
cmpl $-1,(%rdx)
je global_invltlb
lock
incl smp_tlb_wait
/*
* Non-zero smp_tlb_invpcid, only invalidate TLB for entries with
* current PCID.
*/
cmpl $0,invpcid_works
je 1f
/* Use invpcid if available. */
movl $1,%eax /* INVPCID_CTX */
/* invpcid (%rdx),%rax */
.byte 0x66,0x0f,0x38,0x82,0x02
jmp invltlb_ret_clear_pm_save
1:
/* Otherwise reload %cr3 twice. */
movq pcid_cr3,%rdx
cmpq %rax,%rdx
je 2f
movq %rdx,%cr3 /* Invalidate, bit 63 is zero. */
btsq $63,%rax
popq %rax
jmp doreti_iret
/*
* Invalidate the TLB if PCID is not enabled.
* Restore the old address space.
*/
2:
movq %rax,%cr3
jmp invltlb_ret_clear_pm_save
/*
* Single page TLB shootdown
@ -198,18 +262,54 @@ IDTVEC(invlpg)
#endif
pushq %rax
pushq %rdx
movq $smp_tlb_invpcid,%rdx
cmpl $0,pmap_pcid_enabled
je 3f
cmpl $0,invpcid_works
jne 2f
movq smp_tlb_addr1, %rax
invlpg (%rax) /* invalidate single page */
/* kernel pmap - use invlpg to invalidate global mapping */
cmpl $0,(%rdx)
je 3f
cmpl $-1,(%rdx)
je global_invltlb
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
/*
* PCID supported, but INVPCID is not.
* Temporarily switch to the target address space and do INVLPG.
*/
pushq %rcx
movq %cr3,%rcx
movq pcid_cr3,%rax
cmp %rcx,%rax
je 1f
btsq $63,%rax
movq %rax,%cr3
1: movq 8(%rdx),%rax
invlpg (%rax)
btsq $63,%rcx
movq %rcx,%cr3
popq %rcx
jmp invltlb_ret
lock
incl smp_tlb_wait
/*
* Invalidate the TLB entry using INVPCID_ADDR.
*/
2:
xorl %eax,%eax
/* invpcid (%rdx),%rax */
.byte 0x66,0x0f,0x38,0x82,0x02
jmp invltlb_ret
popq %rax
jmp doreti_iret
/*
* PCID is not supported or kernel pmap.
* Invalidate single page using INVLPG.
*/
3:
movq 8(%rdx),%rax
invlpg (%rax)
jmp invltlb_ret
/*
* Page range TLB shootdown.
@ -232,23 +332,76 @@ IDTVEC(invlrng)
pushq %rax
pushq %rdx
movq $smp_tlb_invpcid,%rdx
cmpl $0,pmap_pcid_enabled
jne invlrng_single_page
cmpl $0,invpcid_works
jne invlrng_invpcid
movq smp_tlb_addr1, %rdx
movq smp_tlb_addr2, %rax
/* kernel pmap - use invlpg to invalidate global mapping */
cmpl $0,(%rdx)
je invlrng_single_page
cmpl $-1,(%rdx)
je global_invltlb
pushq %rcx
movq %cr3,%rcx
movq pcid_cr3,%rax
cmpq %rcx,%rax
je 1f
btsq $63,%rax
movq %rax,%cr3
1:
movq 8(%rdx),%rdx
movq smp_tlb_addr2,%rax
2:
invlpg (%rdx)
addq $PAGE_SIZE,%rdx
cmpq %rax,%rdx
jb 2b
btsq $63,%rcx
movq %rcx,%cr3
popq %rcx
jmp invltlb_ret
invlrng_invpcid:
testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
jz 1f
swapgs
1:
pushq %rcx
movq (%rdx),%rcx
movq %rcx,PCPU(INVPCID_DESCR)
movq 8(%rdx),%rax
movq %rax,PCPU(INVPCID_DESCR)+8
movq smp_tlb_addr2,%rcx
xorl %eax,%eax
movq $PC_INVPCID_DESCR,%rdx
gs
subq 8(%rdx),%rcx
shrq $PAGE_SHIFT,%rcx
2:
gs
// invpcid (%rdx),%rax
.byte 0x66,0x0f,0x38,0x82,0x02
gs
addq $PAGE_SIZE,8(%rdx)
dec %rcx
jne 2b
popq %rcx
testb $SEL_RPL_MASK,NAKE_INTR_CS(%rsp)
jz invltlb_ret
swapgs
jmp invltlb_ret
invlrng_single_page:
movq 8(%rdx),%rdx
movq smp_tlb_addr2,%rax
1: invlpg (%rdx) /* invalidate single page */
addq $PAGE_SIZE, %rdx
cmpq %rax, %rdx
addq $PAGE_SIZE,%rdx
cmpq %rax,%rdx
jb 1b
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popq %rdx
popq %rax
jmp doreti_iret
jmp invltlb_ret
/*
* Invalidate cache.
@ -265,17 +418,9 @@ IDTVEC(invlcache)
#endif
pushq %rax
pushq %rdx
wbinvd
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popq %rax
jmp doreti_iret
jmp invltlb_ret
/*
* Handler for IPIs sent via the per-cpu IPI bitmap.

View File

@ -77,8 +77,7 @@ ENTRY(cpu_throw)
LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */
1:
movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */
movq PCB_CR3(%r8),%rdx
movq %rdx,%cr3 /* new address space */
movq PCB_CR3(%r8),%rcx /* new address space */
jmp swact
END(cpu_throw)
@ -145,20 +144,41 @@ ctx_switch_xsave:
SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
jmp sw1
swinact:
movq %rcx,%cr3 /* new address space */
movl PCPU(CPUID), %eax
movl PCPU(CPUID),%eax
/* Release bit from old pmap->pm_active */
movq PCPU(CURPMAP),%rcx
LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */
SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
movq PCPU(CURPMAP),%r12
LK btrl %eax,PM_ACTIVE(%r12) /* clear old */
SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */
swact:
/* Set bit in new pmap->pm_active */
movq TD_PROC(%rsi),%rdx /* newproc */
movq P_VMSPACE(%rdx), %rdx
addq $VM_PMAP,%rdx
cmpl $-1,PM_PCID(%rdx)
je 1f
LK btsl %eax,PM_SAVE(%rdx)
jnc 1f
btsq $63,%rcx /* CR3_PCID_SAVE */
incq PCPU(PM_SAVE_CNT)
1:
movq %rcx,%cr3 /* new address space */
LK btsl %eax,PM_ACTIVE(%rdx) /* set new */
movq %rdx,PCPU(CURPMAP)
/*
* We might lose the race and other CPU might have changed
* the pmap after we set our bit in pmap->pm_save. Recheck.
* Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
* modified, causing TLB flush for this pcid.
*/
btrq $63,%rcx
jnc 1f
LK btsl %eax,PM_SAVE(%rdx)
jc 1f
decq PCPU(PM_SAVE_CNT)
movq %rcx,%cr3
1:
sw1:
#if defined(SCHED_ULE) && defined(SMP)
/* Wait for the new thread to become unblocked */

View File

@ -76,6 +76,8 @@ __FBSDID("$FreeBSD$");
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
@ -225,6 +227,8 @@ ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
ASSYM(PC_PM_SAVE_CNT, offsetof(struct pcpu, pc_pm_save_cnt));
ASSYM(PC_INVPCID_DESCR, offsetof(struct pcpu, pc_invpcid_descr));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));

View File

@ -1909,7 +1909,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
thread0.td_pcb->pcb_cr3 = KPML4phys;
thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
thread0.td_frame = &proc0_tf;
env = getenv("kernelname");

View File

@ -107,9 +107,11 @@ struct pcb stoppcbs[MAXCPU];
struct pcb **susppcbs;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
struct invpcid_descr smp_tlb_invpcid;
volatile int smp_tlb_wait;
uint64_t pcid_cr3;
pmap_t smp_tlb_pmap;
#ifdef COUNT_IPIS
/* Interrupt counts. */
@ -603,6 +605,8 @@ cpu_mp_announce(void)
}
}
extern int pmap_pcid_enabled;
/*
* AP CPU's call this to initialize themselves.
*/
@ -768,6 +772,8 @@ init_secondary(void)
*/
load_cr4(rcr4() | CR4_PGE);
if (pmap_pcid_enabled)
load_cr4(rcr4() | CR4_PCIDE);
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_ufssel);
@ -1119,7 +1125,8 @@ ipi_send_cpu(int cpu, u_int ipi)
* Flush the TLB on all other CPU's
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
vm_offset_t addr2)
{
u_int ncpu;
@ -1129,8 +1136,16 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_invpcid.addr = addr1;
if (pmap == NULL) {
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
(pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
}
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
@ -1139,7 +1154,8 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
}
static void
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
vm_offset_t addr1, vm_offset_t addr2)
{
int cpu, ncpu, othercpus;
@ -1155,8 +1171,16 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_of
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_invpcid.addr = addr1;
if (pmap == NULL) {
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
pcid_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) |
(pmap->pm_pcid == -1 ? 0 : pmap->pm_pcid);
}
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (CPU_ISFULLSET(&mask)) {
ncpu = othercpus;
@ -1182,15 +1206,15 @@ smp_cache_flush(void)
{
if (smp_started)
smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
}
void
smp_invltlb(void)
smp_invltlb(pmap_t pmap)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
@ -1198,11 +1222,11 @@ smp_invltlb(void)
}
void
smp_invlpg(vm_offset_t addr)
smp_invlpg(pmap_t pmap, vm_offset_t addr)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
@ -1210,11 +1234,11 @@ smp_invlpg(vm_offset_t addr)
}
void
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
@ -1223,11 +1247,11 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
}
void
smp_masked_invltlb(cpuset_t mask)
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
@ -1235,11 +1259,11 @@ smp_masked_invltlb(cpuset_t mask)
}
void
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
@ -1247,11 +1271,13 @@ smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
}
void
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
vm_offset_t addr2)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;

View File

@ -116,11 +116,8 @@ __FBSDID("$FreeBSD$");
#include <sys/vmmeter.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/_unrhdr.h>
#include <sys/smp.h>
#else
#include <sys/cpuset.h>
#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -250,6 +247,53 @@ static struct md_page *pv_table;
pt_entry_t *CMAP1 = 0;
caddr_t CADDR1 = 0;
static struct unrhdr pcid_unr;
static struct mtx pcid_mtx;
int pmap_pcid_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN, &pmap_pcid_enabled,
0, "Is TLB Context ID enabled ?");
int invpcid_works = 0;
/*
* Perform the guaranteed invalidation of all TLB entries. This
* includes the global entries, and entries in all PCIDs, not only the
* current context. The function works both on non-PCID CPUs and CPUs
* with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1
* Operations that Invalidate TLBs and Paging-Structure Caches.
*/
static __inline void
invltlb_globpcid(void)
{
uint64_t cr4;
cr4 = rcr4();
load_cr4(cr4 & ~CR4_PGE);
/*
* Although preemption at this point could be detrimental to
* performance, it would not lead to an error. PG_G is simply
* ignored if CR4.PGE is clear. Moreover, in case this block
* is re-entered, the load_cr4() either above or below will
* modify CR4.PGE flushing the TLB.
*/
load_cr4(cr4 | CR4_PGE);
}
static int
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
{
int i;
uint64_t res;
res = 0;
CPU_FOREACH(i) {
res += cpuid_to_pcpu[i]->pc_pm_save_cnt;
}
return (sysctl_handle_64(oidp, &res, 0, req));
}
SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
/*
* Crashdump maps.
*/
@ -685,6 +729,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
CPU_ZERO(&kernel_pmap->pm_save);
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
/*
@ -716,6 +761,21 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* Initialize the PAT MSR. */
pmap_init_pat();
#ifdef SMP
/* Initialize TLB Context Id. */
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
load_cr4(rcr4() | CR4_PCIDE);
mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
/* Check for INVPCID support */
invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
!= 0;
kernel_pmap->pm_pcid = 0;
} else
#endif
pmap_pcid_enabled = 0;
}
/*
@ -952,7 +1012,6 @@ pmap_cache_bits(int mode, boolean_t is_pde)
static void
pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
{
u_long cr4;
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
@ -968,19 +1027,34 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
* Promotion: flush every 4KB page mapping from the TLB,
* including any global (PG_G) mappings.
*/
cr4 = rcr4();
load_cr4(cr4 & ~CR4_PGE);
/*
* Although preemption at this point could be detrimental to
* performance, it would not lead to an error. PG_G is simply
* ignored if CR4.PGE is clear. Moreover, in case this block
* is re-entered, the load_cr4() either above or below will
* modify CR4.PGE flushing the TLB.
*/
load_cr4(cr4 | CR4_PGE);
invltlb_globpcid();
}
}
#ifdef SMP
static void
pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
{
struct invpcid_descr d;
uint64_t cr3;
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pad = 0;
d.addr = va;
invpcid(&d, INVPCID_ADDR);
return;
}
cr3 = rcr3();
critical_enter();
load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
CR3_PCID_SAVE);
invlpg(va);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*
@ -1008,21 +1082,68 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invlpg(va);
smp_invlpg(va);
if (!pmap_pcid_enabled) {
invlpg(va);
} else {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
if (pmap == PCPU_GET(curpmap))
invlpg(va);
else
pmap_invalidate_page_pcid(pmap, va);
} else {
invltlb_globpcid();
}
}
smp_invlpg(pmap, va);
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active))
invlpg(va);
CPU_AND(&other_cpus, &pmap->pm_active);
else if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
pmap_invalidate_page_pcid(pmap, va);
else
invltlb_globpcid();
}
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invlpg(other_cpus, va);
smp_masked_invlpg(other_cpus, pmap, va);
}
sched_unpin();
}
static void
pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct invpcid_descr d;
uint64_t cr3;
vm_offset_t addr;
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pad = 0;
for (addr = sva; addr < eva; addr += PAGE_SIZE) {
d.addr = addr;
invpcid(&d, INVPCID_ADDR);
}
return;
}
cr3 = rcr3();
critical_enter();
load_cr3(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4) | pmap->pm_pcid |
CR3_PCID_SAVE);
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
@ -1032,19 +1153,43 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
if (!pmap_pcid_enabled) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva;
addr += PAGE_SIZE)
invlpg(addr);
} else {
pmap_invalidate_range_pcid(pmap,
sva, eva);
}
} else {
invltlb_globpcid();
}
}
smp_invlpg_range(pmap, sva, eva);
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active))
if (CPU_ISSET(cpuid, &pmap->pm_active)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
CPU_AND(&other_cpus, &pmap->pm_active);
} else if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
pmap_invalidate_range_pcid(pmap, sva, eva);
else
invltlb_globpcid();
}
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invlpg_range(other_cpus, sva, eva);
smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
}
sched_unpin();
}
@ -1053,21 +1198,63 @@ void
pmap_invalidate_all(pmap_t pmap)
{
cpuset_t other_cpus;
struct invpcid_descr d;
uint64_t cr3;
u_int cpuid;
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
invltlb();
smp_invltlb();
cpuid = PCPU_GET(cpuid);
if (pmap == kernel_pmap ||
(pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
!CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (invpcid_works) {
bzero(&d, sizeof(d));
invpcid(&d, INVPCID_CTXGLOB);
} else {
invltlb_globpcid();
}
CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
smp_invltlb(pmap);
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active))
/*
* This logic is duplicated in the Xinvltlb shootdown
* IPI handler.
*/
if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
} else {
cr3 = rcr3();
critical_enter();
/*
* Bit 63 is clear, pcid TLB
* entries are invalidated.
*/
load_cr3(DMAP_TO_PHYS((vm_offset_t)
pmap->pm_pml4) | pmap->pm_pcid);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
} else {
invltlb_globpcid();
}
} else if (CPU_ISSET(cpuid, &pmap->pm_active))
invltlb();
CPU_AND(&other_cpus, &pmap->pm_active);
CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invltlb(other_cpus);
smp_masked_invltlb(other_cpus, pmap);
}
sched_unpin();
}
@ -1129,8 +1316,10 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
CPU_CLR(cpuid, &other_cpus);
if (pmap == kernel_pmap)
active = all_cpus;
else
else {
active = pmap->pm_active;
CPU_AND_ATOMIC(&pmap->pm_save, &active);
}
if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpuid;
act.invalidate = active;
@ -1193,6 +1382,8 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
pde_store(pde, newpde);
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
pmap_update_pde_invalidate(va, newpde);
else
CPU_ZERO(&pmap->pm_save);
}
#endif /* !SMP */
@ -1675,6 +1866,8 @@ pmap_pinit0(pmap_t pmap)
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
CPU_ZERO(&pmap->pm_save);
}
/*
@ -1716,6 +1909,8 @@ pmap_pinit(pmap_t pmap)
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_pcid = pmap_pcid_enabled ? alloc_unr(&pcid_unr) : -1;
CPU_ZERO(&pmap->pm_save);
return (1);
}
@ -1957,6 +2152,14 @@ pmap_release(pmap_t pmap)
KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
if (pmap_pcid_enabled) {
/*
* Invalidate any left TLB entries, to allow the reuse
* of the pcid.
*/
pmap_invalidate_all(pmap);
}
m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
for (i = 0; i < NKPML4E; i++) /* KVA */
@ -1968,6 +2171,8 @@ pmap_release(pmap_t pmap)
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_free_zero(m);
if (pmap->pm_pcid != -1)
free_unr(&pcid_unr, pmap->pm_pcid);
}
static int
@ -5734,15 +5939,20 @@ pmap_activate(struct thread *td)
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
CPU_ZERO(&pmap->pm_save);
cpuid = PCPU_GET(cpuid);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_save);
#else
CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
CPU_SET(cpuid, &pmap->pm_save);
#endif
cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
if (pmap->pm_pcid != -1)
cr3 |= pmap->pm_pcid;
td->td_pcb->pcb_cr3 = cr3;
load_cr3(cr3);
PCPU_SET(curpmap, pmap);

View File

@ -221,6 +221,8 @@ cpu_fork(td1, p2, td2, flags)
*/
pmap2 = vmspace_pmap(p2->p_vmspace);
pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
if (pmap2->pm_pcid != -1)
pcb2->pcb_cr3 |= pmap2->pm_pcid;
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);

View File

@ -67,6 +67,8 @@
struct system_segment_descriptor *pc_ldt; \
/* Pointer to the CPU TSS descriptor */ \
struct system_segment_descriptor *pc_tss; \
uint64_t pc_pm_save_cnt; \
char pc_invpcid_descr[16]; \
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \

View File

@ -240,6 +240,8 @@ struct pmap {
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
cpuset_t pm_save; /* Context valid on cpus mask */
int pm_pcid; /* context id */
/* spare u_int here due to padding */
struct pmap_statistics pm_stats; /* pmap statistics */
struct vm_radix pm_root; /* spare page table pages */

View File

@ -54,6 +54,8 @@ inthand_t
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous); /* handle CPU rendezvous */
struct pmap;
/* functions in mp_machdep.c */
void cpu_add(u_int apic_id, char boot_cpu);
void cpustop_handler(void);
@ -67,13 +69,14 @@ int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
void smp_cache_flush(void);
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
void smp_invlpg(struct pmap *pmap, vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
void smp_invlpg_range(struct pmap *pmap, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
void smp_masked_invltlb(cpuset_t mask);
void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
vm_offset_t startva, vm_offset_t endva);
void smp_invltlb(struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
#endif /* !LOCORE */
#endif /* SMP */