Acquiring smp_ipi_mtx on every call to pmap_invalidate_*() is wasteful.

For example, during a buildworld more than half of the calls do not generate an IPI because the only TLB entry invalidated is on the calling processor. This revision pushes down the acquisition and release of smp_ipi_mtx into smp_tlb_shootdown() and smp_targeted_tlb_shootdown() and instead uses sched_pin() and sched_unpin() in pmap_invalidate_*() so that thread migration doesn't lead to a missed TLB invalidation. Reviewed by: jhb MFC after: 3 weeks
2007-03-05 21:40:10 +00:00 · 2007-03-05 21:40:10 +00:00 · 8da3fc95a7
commit 8da3fc95a7
parent 4d44d81742
5 changed files with 33 additions and 134 deletions
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@ -835,13 +835,16 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
-	mtx_assert(&smp_ipi_mtx, MA_OWNED);
+	if (!(read_rflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
 }

 static void
@ -869,7 +872,9 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
 		if (ncpu < 1)
 			return;
 	}
-	mtx_assert(&smp_ipi_mtx, MA_OWNED);
+	if (!(read_rflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
@ -879,6 +884,7 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
 		ipi_selected(mask, vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
 }

 void
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@ -752,18 +752,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	u_int cpumask;
 	u_int other_cpus;

-	if (smp_started) {
-		if (!(read_rflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
@ -775,10 +764,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
@ -788,18 +774,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	u_int other_cpus;
 	vm_offset_t addr;

-	if (smp_started) {
-		if (!(read_rflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
@ -814,10 +789,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
@ -826,18 +798,7 @@ pmap_invalidate_all(pmap_t pmap)
 	u_int cpumask;
 	u_int other_cpus;

-	if (smp_started) {
-		if (!(read_rflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
@ -849,34 +810,17 @@ pmap_invalidate_all(pmap_t pmap)
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
 pmap_invalidate_cache(void)
 {

-	if (smp_started) {
-		if (!(read_rflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	wbinvd();
 	smp_cache_flush();
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }
 #else /* !SMP */
 /*
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
+#include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/privatespace.h>
@ -1007,13 +1008,16 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
-	mtx_assert(&smp_ipi_mtx, MA_OWNED);
+	if (!(read_eflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
 }

 static void
@ -1041,7 +1045,9 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
 		if (ncpu < 1)
 			return;
 	}
-	mtx_assert(&smp_ipi_mtx, MA_OWNED);
+	if (!(read_eflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
@ -1051,6 +1057,7 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
 		ipi_selected(mask, vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
+	mtx_unlock_spin(&smp_ipi_mtx);
 }

 void
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@ -717,18 +717,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	u_int cpumask;
 	u_int other_cpus;

-	if (smp_started) {
-		if (!(read_eflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
@ -740,10 +729,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
@ -753,18 +739,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	u_int other_cpus;
 	vm_offset_t addr;

-	if (smp_started) {
-		if (!(read_eflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
@ -779,10 +754,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
@ -791,18 +763,7 @@ pmap_invalidate_all(pmap_t pmap)
 	u_int cpumask;
 	u_int other_cpus;

-	if (smp_started) {
-		if (!(read_eflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
@ -814,34 +775,17 @@ pmap_invalidate_all(pmap_t pmap)
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }

 void
 pmap_invalidate_cache(void)
 {

-	if (smp_started) {
-		if (!(read_eflags() & PSL_I))
-			panic("%s: interrupts disabled", __func__);
-		mtx_lock_spin(&smp_ipi_mtx);
-	} else
-		critical_enter();
-	/*
-	 * We need to disable interrupt preemption but MUST NOT have
-	 * interrupts disabled here.
-	 * XXX we may need to hold schedlock to get a coherent pm_active
-	 * XXX critical sections disable interrupts again
-	 */
+	sched_pin();
 	wbinvd();
 	smp_cache_flush();
-	if (smp_started)
-		mtx_unlock_spin(&smp_ipi_mtx);
-	else
-		critical_exit();
+	sched_unpin();
 }
 #else /* !SMP */
 /*
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@ -767,9 +767,7 @@ sf_buf_alloc(struct vm_page *m, int flags)
 		other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
 		if (other_cpus != 0) {
 			sf->cpumask |= other_cpus;
-			mtx_lock_spin(&smp_ipi_mtx);
 			smp_masked_invlpg(other_cpus, sf->kva);
-			mtx_unlock_spin(&smp_ipi_mtx);
 		}
 	}
 	sched_unpin();