From d5f2c1e4fc67887dc2b3935cffa72835a8d89647 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 26 Feb 2019 09:45:44 +0000 Subject: [PATCH] i386 PAE: avoid atomic for pte_store() where possible. Instead carefully write upper word, and only than the lower word with PG_V, for previously invalid ptes. It provides some measurable system time saving on buildworld. Reviewed by: markj Tested by: pho Measured by: bde (early version) Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D19226 --- sys/i386/i386/pmap.c | 4 ++-- sys/i386/include/pmap_nopae.h | 1 + sys/i386/include/pmap_pae.h | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index a77605efa44b..53115d1549a7 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -3801,7 +3801,7 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m, if ((origpte & PG_A) != 0) pmap_invalidate_page_int(pmap, va); } else - pte_store(pte, newpte); + pte_store_zero(pte, newpte); unchanged: @@ -4104,7 +4104,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, #endif if (pmap != kernel_pmap) newpte |= PG_U; - pte_store(pte, newpte); + pte_store_zero(pte, newpte); sched_unpin(); return (mpte); } diff --git a/sys/i386/include/pmap_nopae.h b/sys/i386/include/pmap_nopae.h index d35872fcdf6c..e9d30cf54439 100644 --- a/sys/i386/include/pmap_nopae.h +++ b/sys/i386/include/pmap_nopae.h @@ -86,6 +86,7 @@ typedef uint32_t pdpt_entry_t; /* Only to keep struct pmap layout. */ #define pte_store(ptep, pte) do { \ *(u_int *)(ptep) = (u_int)(pte); \ } while (0) +#define pte_store_zero(ptep, pte) pte_store(ptep, pte) #define pte_load(ptep) atomic_load_int(ptep) extern pt_entry_t PTmap[]; diff --git a/sys/i386/include/pmap_pae.h b/sys/i386/include/pmap_pae.h index 5bf229ca900e..e9394b444ff6 100644 --- a/sys/i386/include/pmap_pae.h +++ b/sys/i386/include/pmap_pae.h @@ -101,6 +101,16 @@ typedef uint64_t pt_entry_t; #define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) #define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) #define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) +#define pte_store_zero(ptep, pte) \ +do { \ + uint32_t *p; \ + \ + MPASS((*ptep & PG_V) == 0); \ + p = (void *)ptep; \ + *(p + 1) = (uint32_t)(pte >> 32); \ + __compiler_membar(); \ + *p = (uint32_t)pte; \ +} while (0) #define pte_load(ptep) atomic_load_acq_64_i586(ptep) extern pdpt_entry_t *IdlePDPT;