i386 PAE: avoid atomic for pte_store() where possible.

Instead carefully write upper word, and only than the lower word with
PG_V, for previously invalid ptes.  It provides some measurable system
time saving on buildworld.

Reviewed by:	markj
Tested by:	pho
Measured by:	bde (early version)
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D19226
This commit is contained in:
Konstantin Belousov 2019-02-26 09:45:44 +00:00
parent 9e018513d0
commit d5f2c1e4fc
3 changed files with 13 additions and 2 deletions

View File

@ -3801,7 +3801,7 @@ __CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m,
if ((origpte & PG_A) != 0)
pmap_invalidate_page_int(pmap, va);
} else
pte_store(pte, newpte);
pte_store_zero(pte, newpte);
unchanged:
@ -4104,7 +4104,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
#endif
if (pmap != kernel_pmap)
newpte |= PG_U;
pte_store(pte, newpte);
pte_store_zero(pte, newpte);
sched_unpin();
return (mpte);
}

View File

@ -86,6 +86,7 @@ typedef uint32_t pdpt_entry_t; /* Only to keep struct pmap layout. */
#define pte_store(ptep, pte) do { \
*(u_int *)(ptep) = (u_int)(pte); \
} while (0)
#define pte_store_zero(ptep, pte) pte_store(ptep, pte)
#define pte_load(ptep) atomic_load_int(ptep)
extern pt_entry_t PTmap[];

View File

@ -101,6 +101,16 @@ typedef uint64_t pt_entry_t;
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0)
#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte)
#define pte_store_zero(ptep, pte) \
do { \
uint32_t *p; \
\
MPASS((*ptep & PG_V) == 0); \
p = (void *)ptep; \
*(p + 1) = (uint32_t)(pte >> 32); \
__compiler_membar(); \
*p = (uint32_t)pte; \
} while (0)
#define pte_load(ptep) atomic_load_acq_64_i586(ptep)
extern pdpt_entry_t *IdlePDPT;