From 965cc255c98f446e1d35d125f184c130a092146b Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Fri, 28 Mar 2014 16:07:27 +0000 Subject: [PATCH] Several fixes for the PCID implementation: - When clearing a bit for a cpuid in pmap->pm_save, ensure that the cpuid is not set in pm_active. The pm_save indicates which CPUs may have cached translations for given PCID, which implies that a CPU executing with the given pmap active have the translations cached. [1] - In smp_masked_invltlb(), pass pmap to smp_targeted_tlb_shootdown(). [1] - In invlrng_handler(), check for the special values of pcid (0 and -1) and do corresponding global or total invalidations before checking for performing PCID-specific range invalidation with INVPCID_ADDR. [2] - In invltlb_pcid_handler(), do not read %cr3 unless needed. [2] - Do minor style tweaks. [2] Submitted by: Henrik Gulbrandsen [1] Other parts sponsored by: The FreeBSD Foundation [2] Tested by: Henrik Gulbrandsen, pho MFC after: 1 week --- sys/amd64/amd64/mp_machdep.c | 40 ++++++++++++++++++++---------------- sys/amd64/amd64/pmap.c | 8 +++++--- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 80b4e1298a8d..afc1bef7a7dc 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1257,7 +1257,7 @@ smp_masked_invltlb(cpuset_t mask, pmap_t pmap) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_masked_global++; #endif @@ -1517,6 +1517,7 @@ void invltlb_pcid_handler(void) { uint64_t cr3; + u_int cpuid; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ @@ -1524,14 +1525,13 @@ invltlb_pcid_handler(void) (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - cr3 = rcr3(); if (smp_tlb_invpcid.pcid != (uint64_t)-1 && smp_tlb_invpcid.pcid != 0) { - if (invpcid_works) { invpcid(&smp_tlb_invpcid, INVPCID_CTX); } else { /* Otherwise reload %cr3 twice. */ + cr3 = rcr3(); if (cr3 != pcid_cr3) { load_cr3(pcid_cr3); cr3 |= CR3_PCID_SAVE; @@ -1541,8 +1541,11 @@ invltlb_pcid_handler(void) } else { invltlb_globpcid(); } - if (smp_tlb_pmap != NULL) - CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save); + if (smp_tlb_pmap != NULL) { + cpuid = PCPU_GET(cpuid); + if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active)) + CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save); + } atomic_add_int(&smp_tlb_wait, 1); } @@ -1608,7 +1611,10 @@ invlpg_range(vm_offset_t start, vm_offset_t end) void invlrng_handler(void) { + struct invpcid_descr d; vm_offset_t addr; + uint64_t cr3; + u_int cpuid; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ @@ -1618,15 +1624,7 @@ invlrng_handler(void) addr = smp_tlb_invpcid.addr; if (pmap_pcid_enabled) { - if (invpcid_works) { - struct invpcid_descr d; - - d = smp_tlb_invpcid; - do { - invpcid(&d, INVPCID_ADDR); - d.addr += PAGE_SIZE; - } while (d.addr < smp_tlb_addr2); - } else if (smp_tlb_invpcid.pcid == 0) { + if (smp_tlb_invpcid.pcid == 0) { /* * kernel pmap - use invlpg to invalidate * global mapping. @@ -1635,12 +1633,18 @@ invlrng_handler(void) } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) { invltlb_globpcid(); if (smp_tlb_pmap != NULL) { - CPU_CLR_ATOMIC(PCPU_GET(cpuid), - &smp_tlb_pmap->pm_save); + cpuid = PCPU_GET(cpuid); + if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active)) + CPU_CLR_ATOMIC(cpuid, + &smp_tlb_pmap->pm_save); } + } else if (invpcid_works) { + d = smp_tlb_invpcid; + do { + invpcid(&d, INVPCID_ADDR); + d.addr += PAGE_SIZE; + } while (d.addr <= smp_tlb_addr2); } else { - uint64_t cr3; - cr3 = rcr3(); if (cr3 != pcid_cr3) load_cr3(pcid_cr3 | CR3_PCID_SAVE); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 1f5070fb894c..84962d81439d 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -838,7 +838,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ - CPU_ZERO(&kernel_pmap->pm_save); + CPU_FILL(&kernel_pmap->pm_save); /* always superset of pm_active */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; @@ -1494,7 +1494,8 @@ pmap_invalidate_all(pmap_t pmap) } else { invltlb_globpcid(); } - CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); + if (!CPU_ISSET(cpuid, &pmap->pm_active)) + CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); smp_invltlb(pmap); } else { other_cpus = all_cpus; @@ -1528,7 +1529,8 @@ pmap_invalidate_all(pmap_t pmap) } } else if (CPU_ISSET(cpuid, &pmap->pm_active)) invltlb(); - CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); + if (!CPU_ISSET(cpuid, &pmap->pm_active)) + CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); if (pmap_pcid_enabled) CPU_AND(&other_cpus, &pmap->pm_save); else