Convert x86 cache invalidation functions to ifuncs.

This simplifies the runtime logic and reduces the number of
runtime-constant branches.

Reviewed by:	alc, markj
Sponsored by:	The FreeBSD Foundation
Approved by:	re (gjb)
Differential revision:	https://reviews.freebsd.org/D16736
This commit is contained in:
Konstantin Belousov 2018-09-19 19:35:02 +00:00
parent 1aed6d48a8
commit d12c446550
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=338807
8 changed files with 145 additions and 93 deletions

View File

@ -648,6 +648,10 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
@ -2171,36 +2175,62 @@ pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
pmap_invalidate_page(pmap, va);
}
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
void
pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
DEFINE_IFUNC(, void, pmap_invalidate_cache_range,
(vm_offset_t sva, vm_offset_t eva), static)
{
if (force) {
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
} else {
KASSERT((sva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: sva not page-aligned"));
KASSERT((eva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: eva not page-aligned"));
if ((cpu_feature & CPUID_SS) != 0)
return (pmap_invalidate_cache_range_selfsnoop);
if ((cpu_feature & CPUID_CLFSH) != 0)
return (pmap_force_invalidate_cache_range);
return (pmap_invalidate_cache_range_all);
}
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
static void
pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva)
{
KASSERT((sva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: sva not page-aligned"));
KASSERT((eva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: eva not page-aligned"));
}
static void
pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
}
void
pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
{
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
* The supplied range is bigger than 2MB.
* Globally invalidate cache.
*/
pmap_invalidate_cache();
return;
}
if ((cpu_feature & CPUID_SS) != 0 && !force)
; /* If "Self Snoop" is supported and allowed, do nothing. */
else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC
* range. The local APIC is always uncached, so we
* don't need to flush for that range anyway.
*/
if (pmap_kextract(sva) == lapic_paddr)
return;
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC range. The
* local APIC is always uncached, so we don't need to flush
* for that range anyway.
*/
if (pmap_kextract(sva) == lapic_paddr)
return;
if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
* Otherwise, do per-cache line flush. Use the sfence
* Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@ -2210,10 +2240,7 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
} else if ((cpu_feature & CPUID_CLFSH) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
if (pmap_kextract(sva) == lapic_paddr)
return;
} else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@ -2223,17 +2250,17 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
} else {
/*
* No targeted cache flush methods are supported by CPU,
* or the supplied range is bigger than 2MB.
* Globally invalidate cache.
*/
pmap_invalidate_cache();
}
}
static void
pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
pmap_invalidate_cache();
}
/*
* Remove the specified set of pages from the data and instruction caches.
*
@ -6938,7 +6965,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
pmap_invalidate_cache_range(va, va + tmpsize, FALSE);
pmap_invalidate_cache_range(va, va + tmpsize);
return ((void *)(va + offset));
}
@ -7297,7 +7324,7 @@ pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
pmap_invalidate_cache_range(base, tmpva, FALSE);
pmap_invalidate_cache_range(base, tmpva);
}
return (error);
}

View File

@ -431,8 +431,8 @@ void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);
void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t);

View File

@ -395,8 +395,8 @@ drm_clflush_virt_range(char *addr, unsigned long length)
{
#if defined(__i386__) || defined(__amd64__)
pmap_invalidate_cache_range((vm_offset_t)addr,
(vm_offset_t)addr + length, TRUE);
pmap_force_invalidate_cache_range((vm_offset_t)addr,
(vm_offset_t)addr + length);
#else
DRM_ERROR("drm_clflush_virt_range not implemented on this architecture");
#endif

View File

@ -471,8 +471,8 @@ init_pipe_control(struct intel_ring_buffer *ring)
if (pc->cpu_page == NULL)
goto err_unpin;
pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
(vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE);
pmap_force_invalidate_cache_range((vm_offset_t)pc->cpu_page,
(vm_offset_t)pc->cpu_page + PAGE_SIZE);
pc->obj = obj;
ring->private = pc;
@ -1102,8 +1102,9 @@ static int init_status_page(struct intel_ring_buffer *ring)
}
pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1);
pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
(vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE);
pmap_force_invalidate_cache_range(
(vm_offset_t)ring->status_page.page_addr,
(vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
ring->status_page.obj = obj;
memset(ring->status_page.page_addr, 0, PAGE_SIZE);

View File

@ -148,6 +148,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#endif
#include <x86/ifunc.h>
#include <machine/bootinfo.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
@ -314,6 +315,10 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_flush_page(vm_page_t m);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
@ -1407,37 +1412,62 @@ pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
pmap_invalidate_page(pmap, va);
}
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
void
pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t),
static)
{
if (force) {
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
} else {
KASSERT((sva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: sva not page-aligned"));
KASSERT((eva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: eva not page-aligned"));
if ((cpu_feature & CPUID_SS) != 0)
return (pmap_invalidate_cache_range_selfsnoop);
if ((cpu_feature & CPUID_CLFSH) != 0)
return (pmap_force_invalidate_cache_range);
return (pmap_invalidate_cache_range_all);
}
#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
static void
pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva)
{
KASSERT((sva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: sva not page-aligned"));
KASSERT((eva & PAGE_MASK) == 0,
("pmap_invalidate_cache_range: eva not page-aligned"));
}
static void
pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
}
void
pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
{
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
/*
* The supplied range is bigger than 2MB.
* Globally invalidate cache.
*/
pmap_invalidate_cache();
return;
}
if ((cpu_feature & CPUID_SS) != 0 && !force)
; /* If "Self Snoop" is supported and allowed, do nothing. */
else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
#ifdef DEV_APIC
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC
* range. The local APIC is always uncached, so we
* don't need to flush for that range anyway.
*/
if (pmap_kextract(sva) == lapic_paddr)
return;
if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) {
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
* registers if we use CLFLUSH on the local APIC
* range. The local APIC is always uncached, so we
* don't need to flush for that range anyway.
*/
if (pmap_kextract(sva) == lapic_paddr)
return;
#endif
/*
* Otherwise, do per-cache line flush. Use the sfence
* Do per-cache line flush. Use the sfence
* instruction to insure that previous stores are
* included in the write-back. The processor
* propagates flush to other processors in the cache
@ -1447,12 +1477,7 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
for (; sva < eva; sva += cpu_clflush_line_size)
clflushopt(sva);
sfence();
} else if ((cpu_feature & CPUID_CLFSH) != 0 &&
eva - sva < PMAP_CLFLUSH_THRESHOLD) {
#ifdef DEV_APIC
if (pmap_kextract(sva) == lapic_paddr)
return;
#endif
} else {
/*
* Writes are ordered by CLFLUSH on Intel CPUs.
*/
@ -1462,17 +1487,17 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
clflush(sva);
if (cpu_vendor_id != CPU_VENDOR_INTEL)
mfence();
} else {
/*
* No targeted cache flush methods are supported by CPU,
* or the supplied range is bigger than 2MB.
* Globally invalidate cache.
*/
pmap_invalidate_cache();
}
}
static void
pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva)
{
pmap_invalidate_cache_range_check_align(sva, eva);
pmap_invalidate_cache();
}
void
pmap_invalidate_cache_pages(vm_page_t *pages, int count)
{
@ -5479,7 +5504,7 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
pmap_invalidate_cache_range(va, va + size, FALSE);
pmap_invalidate_cache_range(va, va + size);
return ((void *)(va + offset));
}
@ -5718,7 +5743,7 @@ pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
*/
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
pmap_invalidate_cache_range(base, tmpva, FALSE);
pmap_invalidate_cache_range(base, tmpva);
}
return (0);
}

View File

@ -650,7 +650,7 @@ sf_buf_invalidate(struct sf_buf *sf)
* settings are recalculated.
*/
pmap_qenter(sf->kva, &m, 1);
pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE);
pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
}
/*

View File

@ -394,8 +394,8 @@ void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
void *pmap_trm_alloc(size_t size, int flags);
void pmap_trm_free(void *addr, size_t size);

View File

@ -368,8 +368,7 @@ dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
* If DMAR does not snoop paging structures accesses, flush
* CPU cache to memory.
*/
pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
TRUE);
pmap_force_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz);
}
void