Various fixes for TLB management on RISC-V.

- Remove the arm64-specific cpu_*cache* and cpu_tlb_flush* functions.
  Instead, add RISC-V specific inline functions in cpufunc.h for the
  fence.i and sfence.vma instructions.
- Catch up to changes in the arm64 pmap and remove all the cpu_dcache_*
  calls, pmap_is_current, pmap_l3_valid_cacheable, and PTE_NEXT bits from
  pmap.
- Remove references to the unimplemented riscv_setttb().
- Remove unused cpu_nullop.
- Add a link to the SBI doc to sbi.h.
- Add support for a 4th argument in SBI calls.  It's not documented but
  it seems implied for the asid argument to SBI_REMOVE_SFENCE_VMA_ASID.
- Pass the arguments from sbi_remote_sfence*() to the SEE.  BBL ignores
  them so this is just cosmetic.
- Flush icaches on other CPUs when they resume from kdb in case the
  debugger wrote any breakpoints while the CPUs were paused in the IPI_STOP
  handler.
- Add SMP vs UP versions of pmap_invalidate_* similar to amd64.  The
  UP versions just use simple fences.  The SMP versions use the
  sbi_remove_sfence*() functions to perform TLB shootdowns.  Since we
  don't have a valid pm_active field in the riscv pmap, just IPI all
  CPUs for all invalidations for now.
- Remove an extraneous TLB flush from the end of pmap_bootstrap().
- Don't do a TLB flush when writing new mappings in pmap_enter(), only if
  modifying an existing mapping.  Note that for COW faults a TLB flush is
  only performed after explicitly clearing the old mapping as is done in
  other pmaps.
- Sync the i-cache on all harts before updating the PTE for executable
  mappings in pmap_enter and pmap_enter_quick.  Previously the i-cache was
  only sync'd after updating the PTE in pmap_enter.
- Use sbi_remote_fence() instead of smp_rendezvous in pmap_sync_icache().

Reviewed by:	markj
Approved by:	re (gjb, kib)
Sponsored by:	DARPA
Differential Revision:	https://reviews.freebsd.org/D17414
This commit is contained in:
John Baldwin 2018-10-15 18:56:54 +00:00
parent 36baf17e54
commit 73efa2fbd1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=339367
8 changed files with 165 additions and 205 deletions

View File

@ -78,11 +78,11 @@ fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val)
switch(fbt->fbtp_patchval) {
case FBT_C_PATCHVAL:
*(uint16_t *)fbt->fbtp_patchpoint = (uint16_t)val;
cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 2);
fence_i();
break;
case FBT_PATCHVAL:
*fbt->fbtp_patchpoint = val;
cpu_icache_sync_range((vm_offset_t)fbt->fbtp_patchpoint, 4);
fence_i();
break;
};
}

View File

@ -81,29 +81,32 @@ intr_enable(void)
);
}
#define cpu_nullop() riscv_nullop()
/* NB: fence() is defined as a macro in <machine/atomic.h>. */
static __inline void
fence_i(void)
{
__asm __volatile("fence.i" ::: "memory");
}
static __inline void
sfence_vma(void)
{
__asm __volatile("sfence.vma" ::: "memory");
}
static __inline void
sfence_vma_page(uintptr_t addr)
{
__asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
}
#define cpufunc_nullop() riscv_nullop()
#define cpu_setttb(a) riscv_setttb(a)
#define cpu_tlb_flushID() riscv_tlb_flushID()
#define cpu_tlb_flushID_SE(e) riscv_tlb_flushID_SE(e)
#define cpu_dcache_wbinv_range(a, s) riscv_dcache_wbinv_range((a), (s))
#define cpu_dcache_inv_range(a, s) riscv_dcache_inv_range((a), (s))
#define cpu_dcache_wb_range(a, s) riscv_dcache_wb_range((a), (s))
#define cpu_idcache_wbinv_range(a, s) riscv_idcache_wbinv_range((a), (s))
#define cpu_icache_sync_range(a, s) riscv_icache_sync_range((a), (s))
void riscv_nullop(void);
void riscv_setttb(vm_offset_t);
void riscv_tlb_flushID(void);
void riscv_tlb_flushID_SE(vm_offset_t);
void riscv_icache_sync_range(vm_offset_t, vm_size_t);
void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t);
void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t);
void riscv_dcache_inv_range(vm_offset_t, vm_size_t);
void riscv_dcache_wb_range(vm_offset_t, vm_size_t);
#endif /* _KERNEL */
#endif /* _MACHINE_CPUFUNC_H_ */

View File

@ -47,7 +47,11 @@ static __inline void
kdb_cpu_sync_icache(unsigned char *addr, size_t size)
{
cpu_icache_sync_range((vm_offset_t)addr, size);
/*
* Other CPUs flush their instruction cache when resuming from
* IPI_STOP.
*/
fence_i();
}
static __inline void

View File

@ -47,18 +47,25 @@
#define SBI_REMOTE_SFENCE_VMA_ASID 7
#define SBI_SHUTDOWN 8
static __inline uint64_t
sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2)
{
/*
* Documentation available at
* https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.md
*/
static __inline uint64_t
sbi_call(uint64_t arg7, uint64_t arg0, uint64_t arg1, uint64_t arg2,
uint64_t arg3)
{
register uintptr_t a0 __asm ("a0") = (uintptr_t)(arg0);
register uintptr_t a1 __asm ("a1") = (uintptr_t)(arg1);
register uintptr_t a2 __asm ("a2") = (uintptr_t)(arg2);
register uintptr_t a3 __asm ("a3") = (uintptr_t)(arg3);
register uintptr_t a7 __asm ("a7") = (uintptr_t)(arg7);
__asm __volatile( \
"ecall" \
:"+r"(a0) \
:"r"(a1), "r"(a2), "r"(a7) \
:"r"(a1), "r"(a2), "r" (a3), "r"(a7) \
:"memory");
return (a0);
@ -68,49 +75,49 @@ static __inline void
sbi_console_putchar(int ch)
{
sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0);
sbi_call(SBI_CONSOLE_PUTCHAR, ch, 0, 0, 0);
}
static __inline int
sbi_console_getchar(void)
{
return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0));
return (sbi_call(SBI_CONSOLE_GETCHAR, 0, 0, 0, 0));
}
static __inline void
sbi_set_timer(uint64_t val)
{
sbi_call(SBI_SET_TIMER, val, 0, 0);
sbi_call(SBI_SET_TIMER, val, 0, 0, 0);
}
static __inline void
sbi_shutdown(void)
{
sbi_call(SBI_SHUTDOWN, 0, 0, 0);
sbi_call(SBI_SHUTDOWN, 0, 0, 0, 0);
}
static __inline void
sbi_clear_ipi(void)
{
sbi_call(SBI_CLEAR_IPI, 0, 0, 0);
sbi_call(SBI_CLEAR_IPI, 0, 0, 0, 0);
}
static __inline void
sbi_send_ipi(const unsigned long *hart_mask)
{
sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0);
sbi_call(SBI_SEND_IPI, (uint64_t)hart_mask, 0, 0, 0);
}
static __inline void
sbi_remote_fence_i(const unsigned long *hart_mask)
{
sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0);
sbi_call(SBI_REMOTE_FENCE_I, (uint64_t)hart_mask, 0, 0, 0);
}
static __inline void
@ -118,7 +125,7 @@ sbi_remote_sfence_vma(const unsigned long *hart_mask,
unsigned long start, unsigned long size)
{
sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, 0, 0);
sbi_call(SBI_REMOTE_SFENCE_VMA, (uint64_t)hart_mask, start, size, 0);
}
static __inline void
@ -127,7 +134,8 @@ sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
unsigned long asid)
{
sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, 0, 0);
sbi_call(SBI_REMOTE_SFENCE_VMA_ASID, (uint64_t)hart_mask, start, size,
asid);
}
#endif /* !_MACHINE_SBI_H_ */

View File

@ -33,70 +33,11 @@
*/
#include <machine/asm.h>
#include <machine/param.h>
__FBSDID("$FreeBSD$");
.text
.align 2
.Lpage_mask:
.word PAGE_MASK
ENTRY(riscv_nullop)
ret
END(riscv_nullop)
/*
* Generic functions to read/modify/write the internal coprocessor registers
*/
ENTRY(riscv_tlb_flushID)
sfence.vma
ret
END(riscv_tlb_flushID)
ENTRY(riscv_tlb_flushID_SE)
sfence.vma
ret
END(riscv_tlb_flushID_SE)
/*
* void riscv_dcache_wb_range(vm_offset_t, vm_size_t)
*/
ENTRY(riscv_dcache_wb_range)
sfence.vma
ret
END(riscv_dcache_wb_range)
/*
* void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t)
*/
ENTRY(riscv_dcache_wbinv_range)
sfence.vma
ret
END(riscv_dcache_wbinv_range)
/*
* void riscv_dcache_inv_range(vm_offset_t, vm_size_t)
*/
ENTRY(riscv_dcache_inv_range)
sfence.vma
ret
END(riscv_dcache_inv_range)
/*
* void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t)
*/
ENTRY(riscv_idcache_wbinv_range)
fence.i
sfence.vma
ret
END(riscv_idcache_wbinv_range)
/*
* void riscv_icache_sync_range(vm_offset_t, vm_size_t)
*/
ENTRY(riscv_icache_sync_range)
fence.i
ret
END(riscv_icache_sync_range)

View File

@ -151,11 +151,8 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data)
while (size-- > 0)
*dst++ = *data++;
fence();
/* Clean D-cache and invalidate I-cache */
cpu_dcache_wb_range(addr, (vm_size_t)size);
cpu_icache_sync_range(addr, (vm_size_t)size);
/* Invalidate I-cache */
fence_i();
}
(void)kdb_jmpbuf(prev_jb);

View File

@ -328,6 +328,12 @@ ipi_handler(void *arg)
CPU_CLR_ATOMIC(cpu, &started_cpus);
CPU_CLR_ATOMIC(cpu, &stopped_cpus);
CTR0(KTR_SMP, "IPI_STOP (restart)");
/*
* The kernel debugger might have set a breakpoint,
* so flush the instruction cache.
*/
fence_i();
break;
case IPI_HARDCLOCK:
CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);

View File

@ -152,6 +152,7 @@ __FBSDID("$FreeBSD$");
#include <machine/machdep.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/sbi.h>
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NUPDE (NPDEPG * NPDEPG)
@ -363,14 +364,6 @@ pmap_is_write(pt_entry_t entry)
return (entry & PTE_W);
}
static __inline int
pmap_is_current(pmap_t pmap)
{
return ((pmap == pmap_kernel()) ||
(pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
}
static __inline int
pmap_l3_valid(pt_entry_t l3)
{
@ -378,17 +371,6 @@ pmap_l3_valid(pt_entry_t l3)
return (l3 & PTE_V);
}
static __inline int
pmap_l3_valid_cacheable(pt_entry_t l3)
{
/* TODO */
return (0);
}
#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
static inline int
pmap_page_accessed(pt_entry_t pte)
{
@ -514,14 +496,13 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
dmap_phys_max = pa;
dmap_max_addr = va;
cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
cpu_tlb_flushID();
sfence_vma();
}
static vm_offset_t
pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
{
vm_offset_t l2pt, l3pt;
vm_offset_t l3pt;
pt_entry_t entry;
pd_entry_t *l2;
vm_paddr_t pa;
@ -532,7 +513,6 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
l2 = pmap_l2(kernel_pmap, va);
l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
l2pt = (vm_offset_t)l2;
l2_slot = pmap_l2_index(va);
l3pt = l3_start;
@ -550,9 +530,6 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
/* Clean the L2 page table */
memset((void *)l3_start, 0, l3pt - l3_start);
cpu_dcache_wb_range(l3_start, l3pt - l3_start);
cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
return (l3pt);
}
@ -676,7 +653,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
freemempos = pmap_bootstrap_l3(l1pt,
VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
cpu_tlb_flushID();
sfence_vma();
#define alloc_pages(var, np) \
(var) = freemempos; \
@ -732,8 +709,6 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
* called something like "Maxphyspage".
*/
Maxmem = atop(phys_avail[avail_slot - 1]);
cpu_tlb_flushID();
}
/*
@ -769,42 +744,98 @@ pmap_init(void)
rw_init(&pv_list_locks[i], "pmap pv list");
}
#ifdef SMP
/*
* For SMP, these functions have to use IPIs for coherence.
*
* In general, the calling thread uses a plain fence to order the
* writes to the page tables before invoking an SBI callback to invoke
* sfence_vma() on remote CPUs.
*
* Since the riscv pmap does not yet have a pm_active field, IPIs are
* sent to all CPUs in the system.
*/
static void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_sfence_vma(mask.__bits, va, 1);
sfence_vma_page(va);
sched_unpin();
}
static void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
/*
* Might consider a loop of sfence_vma_page() for a small
* number of pages in the future.
*/
sfence_vma();
sched_unpin();
}
static void
pmap_invalidate_all(pmap_t pmap)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
/*
* XXX: The SBI doc doesn't detail how to specify x0 as the
* address to perform a global fence. BBL currently treats
* all sfence_vma requests as global however.
*/
sbi_remote_sfence_vma(mask.__bits, 0, 0);
sched_unpin();
}
#else
/*
* Normal, non-SMP, invalidation functions.
* We inline these within pmap.c for speed.
*/
PMAP_INLINE void
static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
/* TODO */
sched_pin();
__asm __volatile("sfence.vma %0" :: "r" (va) : "memory");
sched_unpin();
sfence_vma_page(va);
}
PMAP_INLINE void
static __inline void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
/* TODO */
sched_pin();
__asm __volatile("sfence.vma");
sched_unpin();
/*
* Might consider a loop of sfence_vma_page() for a small
* number of pages in the future.
*/
sfence_vma();
}
PMAP_INLINE void
static __inline void
pmap_invalidate_all(pmap_t pmap)
{
/* TODO */
sched_pin();
__asm __volatile("sfence.vma");
sched_unpin();
sfence_vma();
}
#endif
/*
* Routine: pmap_extract
@ -937,8 +968,6 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l3, entry);
PTE_SYNC(l3);
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
@ -958,11 +987,9 @@ pmap_kremove(vm_offset_t va)
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
if (pmap_l3_valid_cacheable(pmap_load(l3)))
cpu_dcache_wb_range(va, L3_SIZE);
pmap_load_clear(l3);
PTE_SYNC(l3);
pmap_invalidate_page(kernel_pmap, va);
sfence_vma();
}
void
@ -981,11 +1008,11 @@ pmap_kremove_device(vm_offset_t sva, vm_size_t size)
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
pmap_load_clear(l3);
PTE_SYNC(l3);
va += PAGE_SIZE;
size -= PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
}
@ -1039,7 +1066,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l3, entry);
PTE_SYNC(l3);
va += L3_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
@ -1063,10 +1089,7 @@ pmap_qremove(vm_offset_t sva, int count)
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
if (pmap_l3_valid_cacheable(pmap_load(l3)))
cpu_dcache_wb_range(va, L3_SIZE);
pmap_load_clear(l3);
PTE_SYNC(l3);
va += PAGE_SIZE;
}
@ -1127,13 +1150,11 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
l1 = pmap_l1(pmap, va);
pmap_load_clear(l1);
pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
PTE_SYNC(l1);
} else {
/* PTE page */
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
pmap_load_clear(l2);
PTE_SYNC(l2);
}
pmap_resident_count_dec(pmap, 1);
if (m->pindex < NUPDE) {
@ -1279,9 +1300,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, l1index, entry);
PTE_SYNC(l1);
} else {
vm_pindex_t l1index;
pd_entry_t *l1, *l2;
@ -1310,8 +1328,6 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
PTE_SYNC(l2);
}
pmap_resident_count_inc(pmap, 1);
@ -1445,8 +1461,6 @@ pmap_growkernel(vm_offset_t addr)
pmap_load_store(l1, entry);
pmap_distribute_l1(kernel_pmap,
pmap_l1_index(kernel_vm_end), entry);
PTE_SYNC(l1);
continue; /* try again */
}
l2 = pmap_l1_to_l2(l1, kernel_vm_end);
@ -1474,7 +1488,6 @@ pmap_growkernel(vm_offset_t addr)
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
PTE_SYNC(l2);
pmap_invalidate_page(kernel_pmap, kernel_vm_end);
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
@ -1754,10 +1767,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
vm_page_t m;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
cpu_dcache_wb_range(va, L3_SIZE);
old_l3 = pmap_load_clear(l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
if (old_l3 & PTE_SW_WIRED)
pmap->pm_stats.wired_count -= 1;
@ -1913,11 +1923,7 @@ pmap_remove_all(vm_page_t m)
"a block in %p's pv list", m));
l3 = pmap_l2_to_l3(l2, pv->pv_va);
if (pmap_is_current(pmap) &&
pmap_l3_valid_cacheable(pmap_load(l3)))
cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
tl3 = pmap_load_clear(l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, pv->pv_va);
if (tl3 & PTE_SW_WIRED)
pmap->pm_stats.wired_count--;
@ -1947,7 +1953,7 @@ pmap_remove_all(vm_page_t m)
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
vm_offset_t va, va_next;
vm_offset_t va_next;
pd_entry_t *l1, *l2;
pt_entry_t *l3p, l3;
pt_entry_t entry;
@ -1986,7 +1992,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
if (va_next > eva)
va_next = eva;
va = va_next;
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
sva += L3_SIZE) {
l3 = pmap_load(l3p);
@ -1994,7 +1999,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
entry = pmap_load(l3p);
entry &= ~(PTE_W);
pmap_load_store(l3p, entry);
PTE_SYNC(l3p);
/* XXX: Use pmap_invalidate_range */
pmap_invalidate_page(pmap, sva);
}
@ -2092,8 +2096,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
entry |= (l2_pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
PTE_SYNC(l1);
l2 = pmap_l1_to_l2(l1, va);
}
@ -2112,7 +2114,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
entry = (PTE_V);
entry |= (l3_pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
PTE_SYNC(l2);
l3 = pmap_l2_to_l3(l2, va);
}
pmap_invalidate_page(pmap, va);
@ -2163,10 +2164,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
goto validate;
}
/* Flush the cache, there might be uncommitted data in it */
if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
cpu_dcache_wb_range(va, L3_SIZE);
/*
* The physical page has changed. Temporarily invalidate
* the mapping. This ensures that all threads sharing the
@ -2225,13 +2222,20 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
vm_page_aflag_set(m, PGA_WRITEABLE);
}
validate:
/*
* Sync the i-cache on all harts before updating the PTE
* if the new PTE is executable.
*/
if (prot & VM_PROT_EXECUTE)
pmap_sync_icache(pmap, va, PAGE_SIZE);
/*
* Update the L3 entry.
*/
if (orig_l3 != 0) {
validate:
orig_l3 = pmap_load_store(l3, new_l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
KASSERT(PTE_TO_PHYS(orig_l3) == pa,
("pmap_enter: invalid update"));
if (pmap_page_dirty(orig_l3) &&
@ -2239,11 +2243,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
vm_page_dirty(m);
} else {
pmap_load_store(l3, new_l3);
PTE_SYNC(l3);
}
pmap_invalidate_page(pmap, va);
if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
cpu_icache_sync_range(va, PAGE_SIZE);
if (lock != NULL)
rw_wunlock(lock);
@ -2423,9 +2423,16 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
*/
if ((m->oflags & VPO_UNMANAGED) == 0)
entry |= PTE_SW_MANAGED;
/*
* Sync the i-cache on all harts before updating the PTE
* if the new PTE is executable.
*/
if (prot & VM_PROT_EXECUTE)
pmap_sync_icache(pmap, va, PAGE_SIZE);
pmap_load_store(l3, entry);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, va);
return (mpte);
}
@ -2766,11 +2773,7 @@ pmap_remove_pages(pmap_t pmap)
("pmap_remove_pages: bad l3 %#jx",
(uintmax_t)tl3));
if (pmap_is_current(pmap) &&
pmap_l3_valid_cacheable(pmap_load(l3)))
cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
pmap_load_clear(l3);
PTE_SYNC(l3);
pmap_invalidate_page(pmap, pv->pv_va);
/*
@ -3244,16 +3247,10 @@ pmap_activate(struct thread *td)
critical_exit();
}
static void
pmap_sync_icache_one(void *arg __unused)
{
__asm __volatile("fence.i");
}
void
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
{
cpuset_t mask;
/*
* From the RISC-V User-Level ISA V2.2:
@ -3263,8 +3260,12 @@ pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
* before requesting that all remote RISC-V harts execute a
* FENCE.I."
*/
__asm __volatile("fence");
smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_fence_i(mask.__bits);
sched_unpin();
}
/*