Rewrite amd64 PCID implementation to follow an algorithm described in

the Vahalia' "Unix Internals" section 15.12 "Other TLB Consistency
Algorithms".  The same algorithm is already utilized by the MIPS pmap
to handle ASIDs.

The PCID for the address space is now allocated per-cpu during context
switch to the thread using pmap, when no PCID on the cpu was ever
allocated, or the current PCID is invalidated.  If the PCID is reused,
bit 63 of %cr3 can be set to avoid TLB flush.

Each cpu has PCID' algorithm generation count, which is saved in the
pmap pcpu block when pcpu PCID is allocated.  On invalidation, the
pmap generation count is zeroed, which signals the context switch code
that already allocated PCID is no longer valid.  The implication is
the TLB shootdown for the given cpu/address space, due to the
allocation of new PCID.

The pm_save mask is no longer has to be tracked, which (significantly)
reduces the targets of the TLB shootdown IPIs.  Previously, pm_save
was reset only on pmap_invalidate_all(), which made it accumulate the
cpuids of all processors on which the thread was scheduled between
full TLB shootdowns.

Besides reducing the amount of TLB shootdowns and removing atomics to
update pm_saves in the context switch code, the algorithm is much
simpler than the maintanence of pm_save and selection of the right
address space in the shootdown IPI handler.

Reviewed by:	alc
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	3 weeks
This commit is contained in:
kib 2015-05-09 19:11:01 +00:00
parent 332806b946
commit 3fb738761e
13 changed files with 295 additions and 516 deletions

View File

@ -202,14 +202,6 @@ invltlb_ret:
POP_FRAME
jmp doreti_iret
SUPERALIGN_TEXT
IDTVEC(invltlb_pcid)
PUSH_FRAME
call invltlb_pcid_handler
jmp invltlb_ret
SUPERALIGN_TEXT
IDTVEC(invltlb)
PUSH_FRAME
@ -217,16 +209,22 @@ IDTVEC(invltlb)
call invltlb_handler
jmp invltlb_ret
IDTVEC(invltlb_pcid)
PUSH_FRAME
call invltlb_pcid_handler
jmp invltlb_ret
IDTVEC(invltlb_invpcid)
PUSH_FRAME
call invltlb_invpcid_handler
jmp invltlb_ret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
IDTVEC(invlpg_pcid)
PUSH_FRAME
call invlpg_pcid_handler
jmp invltlb_ret
SUPERALIGN_TEXT
IDTVEC(invlpg)

View File

@ -69,16 +69,10 @@
* %rsi = newtd
*/
ENTRY(cpu_throw)
movl PCPU(CPUID),%eax
testq %rdi,%rdi
jz 1f
/* release bit from old pm_active */
movq PCPU(CURPMAP),%rdx
LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */
1:
movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */
movq PCB_CR3(%r8),%rcx /* new address space */
jmp swact
movq %rsi,%r12
movq %rsi,%rdi
call pmap_activate_sw
jmp sw1
END(cpu_throw)
/*
@ -132,59 +126,20 @@ ctx_switch_xsave:
xorl %eax,%eax
movq %rax,PCPU(FPCURTHREAD)
3:
/* Save is done. Now fire up new thread. Leave old vmspace. */
movq TD_PCB(%rsi),%r8
/* switch address space */
movq PCB_CR3(%r8),%rcx
movq %cr3,%rax
cmpq %rcx,%rax /* Same address space? */
jne swinact
SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
jmp sw1
swinact:
movl PCPU(CPUID),%eax
/* Release bit from old pmap->pm_active */
movq PCPU(CURPMAP),%r12
LK btrl %eax,PM_ACTIVE(%r12) /* clear old */
SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */
swact:
/* Set bit in new pmap->pm_active */
movq TD_PROC(%rsi),%rdx /* newproc */
movq P_VMSPACE(%rdx), %rdx
addq $VM_PMAP,%rdx
cmpl $-1,PM_PCID(%rdx)
je 1f
LK btsl %eax,PM_SAVE(%rdx)
jnc 1f
btsq $63,%rcx /* CR3_PCID_SAVE */
incq PCPU(PM_SAVE_CNT)
1:
movq %rcx,%cr3 /* new address space */
LK btsl %eax,PM_ACTIVE(%rdx) /* set new */
movq %rdx,PCPU(CURPMAP)
/*
* We might lose the race and other CPU might have changed
* the pmap after we set our bit in pmap->pm_save. Recheck.
* Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
* modified, causing TLB flush for this pcid.
*/
btrq $63,%rcx
jnc 1f
LK btsl %eax,PM_SAVE(%rdx)
jc 1f
decq PCPU(PM_SAVE_CNT)
movq %rcx,%cr3
1:
movq %rsi,%r12
movq %rdi,%r13
movq %rdx,%r15
movq %rsi,%rdi
callq pmap_activate_sw
SETLK %r15,TD_LOCK(%r13) /* Release the old thread */
sw1:
movq TD_PCB(%r12),%r8
#if defined(SCHED_ULE) && defined(SMP)
/* Wait for the new thread to become unblocked */
movq $blocked_lock, %rdx
1:
movq TD_LOCK(%rsi),%rcx
movq TD_LOCK(%r12),%rcx
cmpq %rcx, %rdx
pause
je 1b
@ -195,13 +150,13 @@ sw1:
*/
/* Skip loading user fsbase/gsbase for kthreads */
testl $TDP_KTHREAD,TD_PFLAGS(%rsi)
testl $TDP_KTHREAD,TD_PFLAGS(%r12)
jnz do_kthread
/*
* Load ldt register
*/
movq TD_PROC(%rsi),%rcx
movq TD_PROC(%r12),%rcx
cmpq $0, P_MD+MD_LDT(%rcx)
jne do_ldt
xorl %eax,%eax
@ -238,7 +193,7 @@ done_tss:
movq %r8,PCPU(CURPCB)
/* Update the TSS_RSP0 pointer for the next interrupt */
movq %r8,COMMON_TSS_RSP0(%rdx)
movq %rsi,PCPU(CURTHREAD) /* into next thread */
movq %r12,PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)

View File

@ -71,8 +71,6 @@ __FBSDID("$FreeBSD$");
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
ASSYM(PM_SAVE, offsetof(struct pmap, pm_save));
ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid));
ASSYM(P_MD, offsetof(struct proc, p_md));
ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));

View File

@ -1718,7 +1718,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */
thread0.td_frame = &proc0_tf;
env = kern_getenv("kernelname");

View File

@ -88,12 +88,9 @@ char *doublefault_stack;
char *nmi_stack;
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr2;
struct invpcid_descr smp_tlb_invpcid;
static vm_offset_t smp_tlb_addr1, smp_tlb_addr2;
static pmap_t smp_tlb_pmap;
volatile int smp_tlb_wait;
uint64_t pcid_cr3;
pmap_t smp_tlb_pmap;
extern int invpcid_works;
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
@ -139,14 +136,17 @@ cpu_mp_start(void)
/* Install an inter-CPU IPI for TLB invalidation */
if (pmap_pcid_enabled) {
setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
SEL_KPL, 0);
setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT,
SEL_KPL, 0);
if (invpcid_works) {
setidt(IPI_INVLTLB, IDTVEC(invltlb_invpcid),
SDT_SYSIGT, SEL_KPL, 0);
} else {
setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT,
SEL_KPL, 0);
}
} else {
setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
}
setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
/* Install an inter-CPU IPI for cache invalidation. */
@ -242,6 +242,9 @@ init_secondary(void)
pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
GUSERLDT_SEL];
pc->pc_curpmap = kernel_pmap;
pc->pc_pcid_gen = 1;
pc->pc_pcid_next = PMAP_PCID_KERN + 1;
/* Save the per-cpu pointer for use by the NMI handler. */
np->np_pcpu = (register_t) pc;
@ -407,35 +410,8 @@ start_ap(int apic_id)
}
/*
* Flush the TLB on all other CPU's
* Flush the TLB on other CPU's
*/
static void
smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1,
vm_offset_t addr2)
{
u_int ncpu;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_invpcid.addr = addr1;
if (pmap == NULL) {
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
pcid_cr3 = pmap->pm_cr3;
}
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_ipi_mtx);
}
static void
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
@ -443,7 +419,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
{
int cpu, ncpu, othercpus;
othercpus = mp_ncpus - 1;
othercpus = mp_ncpus - 1; /* does not shootdown self */
/*
* Check for other cpus. Return if none.
*/
if (CPU_ISFULLSET(&mask)) {
if (othercpus < 1)
return;
@ -452,16 +432,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
if (CPU_EMPTY(&mask))
return;
}
if (!(read_rflags() & PSL_I))
panic("%s: interrupts disabled", __func__);
mtx_lock_spin(&smp_ipi_mtx);
smp_tlb_invpcid.addr = addr1;
if (pmap == NULL) {
smp_tlb_invpcid.pcid = 0;
} else {
smp_tlb_invpcid.pcid = pmap->pm_pcid;
pcid_cr3 = pmap->pm_cr3;
}
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_pmap = pmap;
atomic_store_rel_int(&smp_tlb_wait, 0);
@ -485,11 +460,23 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
}
void
smp_invlpg(pmap_t pmap, vm_offset_t addr)
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
}
void
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
@ -497,11 +484,12 @@ smp_invlpg(pmap_t pmap, vm_offset_t addr)
}
void
smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2);
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL,
addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
@ -509,62 +497,13 @@ smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2)
}
}
void
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
}
void
smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
}
void
smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1,
vm_offset_t addr2)
{
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1,
addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
}
void
smp_cache_flush(void)
{
if (smp_started)
smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0);
}
void
smp_invltlb(pmap_t pmap)
{
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
0, 0);
}
}
@ -586,10 +525,10 @@ invltlb_handler(void)
}
void
invltlb_pcid_handler(void)
invltlb_invpcid_handler(void)
{
uint64_t cr3;
u_int cpuid;
struct invpcid_descr d;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@ -597,28 +536,39 @@ invltlb_pcid_handler(void)
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
smp_tlb_invpcid.pcid != 0) {
if (invpcid_works) {
invpcid(&smp_tlb_invpcid, INVPCID_CTX);
} else {
/* Otherwise reload %cr3 twice. */
cr3 = rcr3();
if (cr3 != pcid_cr3) {
load_cr3(pcid_cr3);
cr3 |= CR3_PCID_SAVE;
}
load_cr3(cr3);
}
} else {
invltlb_globpcid();
}
if (smp_tlb_pmap != NULL) {
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save);
}
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB :
INVPCID_CTX);
atomic_add_int(&smp_tlb_wait, 1);
}
void
invltlb_pcid_handler(void)
{
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
if (smp_tlb_pmap == kernel_pmap) {
invltlb_globpcid();
} else {
/*
* The current pmap might not be equal to
* smp_tlb_pmap. The clearing of the pm_gen in
* pmap_invalidate_all() takes care of TLB
* invalidation when switching to the pmap on this
* CPU.
*/
if (PCPU_GET(curpmap) == smp_tlb_pmap) {
load_cr3(smp_tlb_pmap->pm_cr3 |
smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid);
}
}
atomic_add_int(&smp_tlb_wait, 1);
}
@ -632,60 +582,15 @@ invlpg_handler(void)
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
invlpg(smp_tlb_invpcid.addr);
invlpg(smp_tlb_addr1);
atomic_add_int(&smp_tlb_wait, 1);
}
void
invlpg_pcid_handler(void)
{
uint64_t cr3;
#ifdef COUNT_XINVLTLB_HITS
xhits_pg[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
#ifdef COUNT_IPIS
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
invltlb_globpcid();
} else if (smp_tlb_invpcid.pcid == 0) {
invlpg(smp_tlb_invpcid.addr);
} else if (invpcid_works) {
invpcid(&smp_tlb_invpcid, INVPCID_ADDR);
} else {
/*
* PCID supported, but INVPCID is not.
* Temporarily switch to the target address
* space and do INVLPG.
*/
cr3 = rcr3();
if (cr3 != pcid_cr3)
load_cr3(pcid_cr3 | CR3_PCID_SAVE);
invlpg(smp_tlb_invpcid.addr);
load_cr3(cr3 | CR3_PCID_SAVE);
}
atomic_add_int(&smp_tlb_wait, 1);
}
static inline void
invlpg_range(vm_offset_t start, vm_offset_t end)
{
do {
invlpg(start);
start += PAGE_SIZE;
} while (start < end);
}
void
invlrng_handler(void)
{
struct invpcid_descr d;
vm_offset_t addr;
uint64_t cr3;
u_int cpuid;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@ -693,38 +598,11 @@ invlrng_handler(void)
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
addr = smp_tlb_invpcid.addr;
if (pmap_pcid_enabled) {
if (smp_tlb_invpcid.pcid == 0) {
/*
* kernel pmap - use invlpg to invalidate
* global mapping.
*/
invlpg_range(addr, smp_tlb_addr2);
} else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
invltlb_globpcid();
if (smp_tlb_pmap != NULL) {
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
CPU_CLR_ATOMIC(cpuid,
&smp_tlb_pmap->pm_save);
}
} else if (invpcid_works) {
d = smp_tlb_invpcid;
do {
invpcid(&d, INVPCID_ADDR);
d.addr += PAGE_SIZE;
} while (d.addr <= smp_tlb_addr2);
} else {
cr3 = rcr3();
if (cr3 != pcid_cr3)
load_cr3(pcid_cr3 | CR3_PCID_SAVE);
invlpg_range(addr, smp_tlb_addr2);
load_cr3(cr3 | CR3_PCID_SAVE);
}
} else {
invlpg_range(addr, smp_tlb_addr2);
}
addr = smp_tlb_addr1;
do {
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < smp_tlb_addr2);
atomic_add_int(&smp_tlb_wait, 1);
}

View File

@ -273,6 +273,8 @@ pmap_modified_bit(pmap_t pmap)
return (mask);
}
extern struct pcpu __pcpu[];
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@ -379,8 +381,6 @@ caddr_t CADDR1 = 0;
static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
static struct unrhdr pcid_unr;
static struct mtx pcid_mtx;
int pmap_pcid_enabled = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
@ -827,6 +827,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
{
vm_offset_t va;
pt_entry_t *pte;
int i;
/*
* Create an initial set of page tables to run the kernel in.
@ -861,7 +862,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_cr3 = KPML4phys;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
CPU_FILL(&kernel_pmap->pm_save); /* always superset of pm_active */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
@ -895,18 +895,28 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
/* Initialize TLB Context Id. */
TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
load_cr4(rcr4() | CR4_PCIDE);
mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF);
init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx);
/* Check for INVPCID support */
invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
!= 0;
kernel_pmap->pm_pcid = 0;
#ifndef SMP
for (i = 0; i < MAXCPU; i++) {
kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
kernel_pmap->pm_pcids[i].pm_gen = 1;
}
__pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1;
__pcpu[0].pc_pcid_gen = 1;
/*
* pcpu area for APs is zeroed during AP startup.
* pc_pcid_next and pc_pcid_gen are initialized by AP
* during pcpu setup.
*/
#ifdef SMP
load_cr4(rcr4() | CR4_PCIDE);
#else
pmap_pcid_enabled = 0;
#endif
} else
} else {
pmap_pcid_enabled = 0;
}
}
/*
@ -1277,28 +1287,6 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
}
#ifdef SMP
static void
pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va)
{
struct invpcid_descr d;
uint64_t cr3;
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pad = 0;
d.addr = va;
invpcid(&d, INVPCID_ADDR);
return;
}
cr3 = rcr3();
critical_enter();
load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
invlpg(va);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*
@ -1361,8 +1349,8 @@ pmap_invalidate_ept(pmap_t pmap)
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t other_cpus;
u_int cpuid;
cpuset_t *mask;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@ -1373,74 +1361,33 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
("pmap_invalidate_page: invalid type %d", pmap->pm_type));
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (!pmap_pcid_enabled) {
invlpg(va);
} else {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
if (pmap == PCPU_GET(curpmap))
invlpg(va);
else
pmap_invalidate_page_pcid(pmap, va);
} else {
invltlb_globpcid();
}
}
smp_invlpg(pmap, va);
if (pmap == kernel_pmap) {
invlpg(va);
mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active))
if (pmap == PCPU_GET(curpmap))
invlpg(va);
else if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
pmap_invalidate_page_pcid(pmap, va);
else
invltlb_globpcid();
else if (pmap_pcid_enabled)
pmap->pm_pcids[cpuid].pm_gen = 0;
if (pmap_pcid_enabled) {
CPU_FOREACH(i) {
if (cpuid != i)
pmap->pm_pcids[i].pm_gen = 0;
}
}
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invlpg(other_cpus, pmap, va);
mask = &pmap->pm_active;
}
smp_masked_invlpg(*mask, va);
sched_unpin();
}
static void
pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct invpcid_descr d;
uint64_t cr3;
vm_offset_t addr;
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pad = 0;
for (addr = sva; addr < eva; addr += PAGE_SIZE) {
d.addr = addr;
invpcid(&d, INVPCID_ADDR);
}
return;
}
cr3 = rcr3();
critical_enter();
load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE);
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
cpuset_t other_cpus;
cpuset_t *mask;
vm_offset_t addr;
u_int cpuid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@ -1451,55 +1398,36 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
("pmap_invalidate_range: invalid type %d", pmap->pm_type));
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (!pmap_pcid_enabled) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva;
addr += PAGE_SIZE)
invlpg(addr);
} else {
pmap_invalidate_range_pcid(pmap,
sva, eva);
}
} else {
invltlb_globpcid();
}
}
smp_invlpg_range(pmap, sva, eva);
cpuid = PCPU_GET(cpuid);
if (pmap == kernel_pmap) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
mask = &all_cpus;
} else {
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
if (CPU_ISSET(cpuid, &pmap->pm_active)) {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0)
pmap_invalidate_range_pcid(pmap, sva, eva);
else
invltlb_globpcid();
pmap->pm_pcids[cpuid].pm_gen = 0;
}
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invlpg_range(other_cpus, pmap, sva, eva);
if (pmap_pcid_enabled) {
CPU_FOREACH(i) {
if (cpuid != i)
pmap->pm_pcids[i].pm_gen = 0;
}
}
mask = &pmap->pm_active;
}
smp_masked_invlpg_range(*mask, sva, eva);
sched_unpin();
}
void
pmap_invalidate_all(pmap_t pmap)
{
cpuset_t other_cpus;
cpuset_t *mask;
struct invpcid_descr d;
uint64_t cr3;
u_int cpuid;
u_int cpuid, i;
if (pmap_type_guest(pmap)) {
pmap_invalidate_ept(pmap);
@ -1510,60 +1438,42 @@ pmap_invalidate_all(pmap_t pmap)
("pmap_invalidate_all: invalid type %d", pmap->pm_type));
sched_pin();
cpuid = PCPU_GET(cpuid);
if (pmap == kernel_pmap ||
(pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) ||
!CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (invpcid_works) {
if (pmap == kernel_pmap) {
if (pmap_pcid_enabled && invpcid_works) {
bzero(&d, sizeof(d));
invpcid(&d, INVPCID_CTXGLOB);
} else {
invltlb_globpcid();
}
if (!CPU_ISSET(cpuid, &pmap->pm_active))
CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
smp_invltlb(pmap);
mask = &all_cpus;
} else {
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
/*
* This logic is duplicated in the Xinvltlb shootdown
* IPI handler.
*/
if (pmap_pcid_enabled) {
if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) {
cpuid = PCPU_GET(cpuid);
if (pmap == PCPU_GET(curpmap)) {
if (pmap_pcid_enabled) {
if (invpcid_works) {
d.pcid = pmap->pm_pcid;
d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
d.pad = 0;
d.addr = 0;
invpcid(&d, INVPCID_CTX);
} else {
cr3 = rcr3();
critical_enter();
/*
* Bit 63 is clear, pcid TLB
* entries are invalidated.
*/
load_cr3(pmap->pm_cr3);
load_cr3(cr3 | CR3_PCID_SAVE);
critical_exit();
load_cr3(pmap->pm_cr3 | pmap->pm_pcids
[PCPU_GET(cpuid)].pm_pcid);
}
} else {
invltlb_globpcid();
invltlb();
}
} else if (CPU_ISSET(cpuid, &pmap->pm_active))
invltlb();
if (!CPU_ISSET(cpuid, &pmap->pm_active))
CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
CPU_AND(&other_cpus, &pmap->pm_active);
if (!CPU_EMPTY(&other_cpus))
smp_masked_invltlb(other_cpus, pmap);
} else if (pmap_pcid_enabled) {
pmap->pm_pcids[cpuid].pm_gen = 0;
}
if (pmap_pcid_enabled) {
CPU_FOREACH(i) {
if (cpuid != i)
pmap->pm_pcids[i].pm_gen = 0;
}
}
mask = &pmap->pm_active;
}
smp_masked_invltlb(*mask, pmap);
sched_unpin();
}
@ -1627,7 +1537,6 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
active = all_cpus;
else {
active = pmap->pm_active;
CPU_AND_ATOMIC(&pmap->pm_save, &active);
}
if (CPU_OVERLAP(&active, &other_cpus)) {
act.store = cpuid;
@ -2205,11 +2114,9 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_cr3 = KPML4phys;
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
CPU_ZERO(&pmap->pm_save);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
pmap->pm_flags = pmap_flags;
}
@ -2233,7 +2140,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
pml4phys = VM_PAGE_TO_PHYS(pml4pg);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
pmap->pm_pcid = -1;
CPU_FOREACH(i) {
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
pmap->pm_pcids[i].pm_gen = 0;
}
pmap->pm_cr3 = ~0; /* initialize to an invalid value */
if ((pml4pg->flags & PG_ZERO) == 0)
@ -2260,12 +2170,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
/* install self-referential address mapping entry(s) */
pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
if (pmap_pcid_enabled) {
pmap->pm_pcid = alloc_unr(&pcid_unr);
if (pmap->pm_pcid != -1)
pmap->pm_cr3 |= pmap->pm_pcid;
}
}
pmap->pm_root.rt_root = 0;
@ -2274,7 +2178,6 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_flags = flags;
pmap->pm_eptgen = 0;
CPU_ZERO(&pmap->pm_save);
return (1);
}
@ -2535,14 +2438,6 @@ pmap_release(pmap_t pmap)
KASSERT(CPU_EMPTY(&pmap->pm_active),
("releasing active pmap %p", pmap));
if (pmap_pcid_enabled) {
/*
* Invalidate any left TLB entries, to allow the reuse
* of the pcid.
*/
pmap_invalidate_all(pmap);
}
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
for (i = 0; i < NKPML4E; i++) /* KVA */
@ -2554,8 +2449,6 @@ pmap_release(pmap_t pmap)
m->wire_count--;
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_page_free_zero(m);
if (pmap->pm_pcid != -1)
free_unr(&pcid_unr, pmap->pm_pcid);
}
static int
@ -6657,28 +6550,84 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
return (val);
}
static uint64_t
pmap_pcid_alloc(pmap_t pmap, u_int cpuid)
{
uint32_t gen, new_gen, pcid_next;
CRITICAL_ASSERT(curthread);
gen = PCPU_GET(pcid_gen);
if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
pmap->pm_pcids[cpuid].pm_gen == gen)
return (CR3_PCID_SAVE);
pcid_next = PCPU_GET(pcid_next);
KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x",
cpuid, pcid_next));
if (pcid_next == PMAP_PCID_OVERMAX) {
new_gen = gen + 1;
if (new_gen == 0)
new_gen = 1;
PCPU_SET(pcid_gen, new_gen);
pcid_next = PMAP_PCID_KERN + 1;
} else {
new_gen = gen;
}
pmap->pm_pcids[cpuid].pm_pcid = pcid_next;
pmap->pm_pcids[cpuid].pm_gen = new_gen;
PCPU_SET(pcid_next, pcid_next + 1);
return (0);
}
void
pmap_activate_sw(struct thread *td)
{
pmap_t oldpmap, pmap;
uint64_t cached, cr3;
u_int cpuid;
oldpmap = PCPU_GET(curpmap);
pmap = vmspace_pmap(td->td_proc->p_vmspace);
if (oldpmap == pmap)
return;
cpuid = PCPU_GET(cpuid);
#ifdef SMP
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
#else
CPU_SET(cpuid, &pmap->pm_active);
#endif
cr3 = rcr3();
if (pmap_pcid_enabled) {
cached = pmap_pcid_alloc(pmap, cpuid);
KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
("pmap %p cpu %d pcid %#x", pmap, cpuid,
pmap->pm_pcids[cpuid].pm_pcid));
KASSERT(pmap != PMAP_PCID_KERN || pmap == kernel_pmap,
("non-kernel pmap %p cpu %d pcid %#x", pmap, cpuid,
pmap->pm_pcids[cpuid].pm_pcid));
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
cached);
if (cached)
PCPU_INC(pm_save_cnt);
}
} else if (cr3 != pmap->pm_cr3) {
load_cr3(pmap->pm_cr3);
}
PCPU_SET(curpmap, pmap);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
#else
CPU_CLR(cpuid, &oldpmap->pm_active);
#endif
}
void
pmap_activate(struct thread *td)
{
pmap_t pmap, oldpmap;
u_int cpuid;
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
cpuid = PCPU_GET(cpuid);
#ifdef SMP
CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
CPU_SET_ATOMIC(cpuid, &pmap->pm_save);
#else
CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
CPU_SET(cpuid, &pmap->pm_save);
#endif
td->td_pcb->pcb_cr3 = pmap->pm_cr3;
load_cr3(pmap->pm_cr3);
PCPU_SET(curpmap, pmap);
pmap_activate_sw(td);
critical_exit();
}

View File

@ -219,7 +219,6 @@ cpu_fork(td1, p2, td2, flags)
* return address on stack. These are the kernel mode register values.
*/
pmap2 = vmspace_pmap(p2->p_vmspace);
pcb2->pcb_cr3 = pmap2->pm_cr3;
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
@ -477,7 +476,6 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
pcb2->pcb_rip = (register_t)fork_trampoline;
/*
* If we didn't copy the pcb, we'd need to do the following registers:
* pcb2->pcb_cr3: cloned above.
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
* pcb2->pcb_onfault: cloned above (always NULL here?).

View File

@ -540,9 +540,8 @@ static __inline void
invpcid(struct invpcid_descr *d, int type)
{
/* invpcid (%rdx),%rax */
__asm __volatile(".byte 0x66,0x0f,0x38,0x82,0x02"
: : "d" (d), "a" ((u_long)type) : "memory");
__asm __volatile("invpcid (%0),%1"
: : "r" (d), "r" ((u_long)type) : "memory");
}
static __inline u_short

View File

@ -63,7 +63,9 @@
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \
u_int pc_vcpu_id; /* Xen vCPU ID */ \
char __pad[157] /* be divisor of PAGE_SIZE \
uint32_t pc_pcid_next; \
uint32_t pc_pcid_gen; \
char __pad[149] /* be divisor of PAGE_SIZE \
after cache alignment */
#define PC_DBREG_CMD_NONE 0

View File

@ -219,6 +219,10 @@
#define ISA_HOLE_START 0xa0000
#define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
#define PMAP_PCID_NONE 0xffffffff
#define PMAP_PCID_KERN 0
#define PMAP_PCID_OVERMAX 0x1000
#ifndef LOCORE
#include <sys/queue.h>
@ -292,6 +296,11 @@ enum pmap_type {
PT_RVI, /* AMD's nested page tables */
};
struct pmap_pcids {
uint32_t pm_pcid;
uint32_t pm_gen;
};
/*
* The kernel virtual address (KVA) of the level 4 page table page is always
* within the direct map (DMAP) region.
@ -302,13 +311,12 @@ struct pmap {
uint64_t pm_cr3;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
cpuset_t pm_save; /* Context valid on cpus mask */
int pm_pcid; /* context id */
enum pmap_type pm_type; /* regular or nested tables */
struct pmap_statistics pm_stats; /* pmap statistics */
struct vm_radix pm_root; /* spare page table pages */
long pm_eptgen; /* EPT pmap generation id */
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
};
/* flags */
@ -375,6 +383,9 @@ extern vm_paddr_t dmaplimit;
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
struct thread;
void pmap_activate_sw(struct thread *);
void pmap_bootstrap(vm_paddr_t *);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);

View File

@ -46,6 +46,7 @@ extern struct mtx ap_boot_mtx;
extern int cpu_logical;
extern int cpu_cores;
extern int pmap_pcid_enabled;
extern int invpcid_works;
extern u_int xhits_gbl[];
extern u_int xhits_pg[];
extern u_int xhits_rng[];
@ -53,10 +54,6 @@ extern u_int ipi_global;
extern u_int ipi_page;
extern u_int ipi_range;
extern u_int ipi_range_size;
extern u_int ipi_masked_global;
extern u_int ipi_masked_page;
extern u_int ipi_masked_range;
extern u_int ipi_masked_range_size;
extern volatile int smp_tlb_wait;
@ -78,9 +75,9 @@ extern u_long *ipi_rendezvous_counts[MAXCPU];
/* IPI handlers */
inthand_t
IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid enabled */
IDTVEC(invltlb), /* TLB shootdowns - global */
IDTVEC(invlpg_pcid), /* TLB shootdowns - 1 page, pcid enabled */
IDTVEC(invltlb_pcid), /* TLB shootdowns - global, pcid */
IDTVEC(invltlb_invpcid),/* TLB shootdowns - global, invpcid */
IDTVEC(invlpg), /* TLB shootdowns - 1 page */
IDTVEC(invlrng), /* TLB shootdowns - page range */
IDTVEC(invlcache), /* Write back and invalidate cache */
@ -100,8 +97,8 @@ void cpususpend_handler(void);
void init_secondary_tail(void);
void invltlb_handler(void);
void invltlb_pcid_handler(void);
void invltlb_invpcid_handler(void);
void invlpg_handler(void);
void invlpg_pcid_handler(void);
void invlrng_handler(void);
void invlcache_handler(void);
void init_secondary(void);
@ -114,13 +111,9 @@ void ipi_selected(cpuset_t cpus, u_int ipi);
u_int mp_bootaddress(u_int);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_invlpg(struct pmap *pmap, vm_offset_t addr);
void smp_masked_invlpg(cpuset_t mask, struct pmap *pmap, vm_offset_t addr);
void smp_invlpg_range(struct pmap *pmap, vm_offset_t startva,
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva);
void smp_masked_invlpg_range(cpuset_t mask, struct pmap *pmap,
vm_offset_t startva, vm_offset_t endva);
void smp_invltlb(struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
int native_start_all_aps(void);
void mem_range_AP_init(void);

View File

@ -53,6 +53,7 @@
#define CR0_CD 0x40000000 /* Cache Disable */
#define CR3_PCID_SAVE 0x8000000000000000
#define CR3_PCID_MASK 0xfff
/*
* Bits in PPro special registers

View File

@ -422,6 +422,14 @@ xen_invltlb(void *arg)
}
#ifdef __amd64__
static int
xen_invltlb_invpcid(void *arg)
{
invltlb_invpcid_handler();
return (FILTER_HANDLED);
}
static int
xen_invltlb_pcid(void *arg)
{
@ -439,16 +447,6 @@ xen_invlpg(void *arg)
return (FILTER_HANDLED);
}
#ifdef __amd64__
static int
xen_invlpg_pcid(void *arg)
{
invlpg_pcid_handler();
return (FILTER_HANDLED);
}
#endif
static int
xen_invlrng(void *arg)
{
@ -532,8 +530,8 @@ xen_setup_cpus(void)
#ifdef __amd64__
if (pmap_pcid_enabled) {
xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = invpcid_works ?
xen_invltlb_invpcid : xen_invltlb_pcid;
}
#endif
CPU_FOREACH(i)