From 62ec4add59e6f0a221309e91146a17ad857adc2f Mon Sep 17 00:00:00 2001
From: Thomas Moestl <tmm@FreeBSD.org>
Date: Wed, 2 Jan 2002 18:49:20 +0000
Subject: [PATCH] 1. Implement an optimization for pmap_remove() and
 pmap_protect(): if a    substantial fraction of the number of entries of
 tte's in the tsb    would need to be looked up, traverse the tsb instead.
 This is crucial    in some places, e.g. when swapping out a process, where a
 certain    pmap_remove() call would take very long time to complete without
 this. 2. Implement pmap_qenter_flags(), which will become used later 3.
 Reactivate the instruction cache flush done when mapping as executable.   
 This is required e.g. when executing files via NFS, but is known to    cause
 problems on UltraSPARC-IIe CPU's. If you have such a CPU, you    will need to
 comment this call out for now.

Submitted by:	jake (3)
---
 sys/sparc64/include/pmap.h |   1 +
 sys/sparc64/include/tsb.h  |   9 ++-
 sys/sparc64/sparc64/pmap.c | 153 +++++++++++++++++++++++++------------
 sys/sparc64/sparc64/tsb.c  |  28 +++++++
 4 files changed, 143 insertions(+), 48 deletions(-)

diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h
index 7ab24097b350..71496c34cf6d 100644
--- a/sys/sparc64/include/pmap.h
+++ b/sys/sparc64/include/pmap.h
@@ -87,6 +87,7 @@ struct pv_entry {
 void	pmap_bootstrap(vm_offset_t ekva);
 vm_offset_t pmap_kextract(vm_offset_t va);
 void	pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags);
+void	pmap_qenter_flags(vm_offset_t va, vm_page_t *m, int count, u_long fl);
 
 int	pmap_cache_enter(vm_page_t m, vm_offset_t va);
 void	pmap_cache_remove(vm_page_t m, vm_offset_t va);
diff --git a/sys/sparc64/include/tsb.h b/sys/sparc64/include/tsb.h
index 9391e6077fd5..b99621717a1f 100644
--- a/sys/sparc64/include/tsb.h
+++ b/sys/sparc64/include/tsb.h
@@ -32,10 +32,13 @@
 #ifndef	_MACHINE_TSB_H_
 #define	_MACHINE_TSB_H_
 
+#define	TSB_BSHIFT			PAGE_SHIFT_8K
+#define	TSB_BSIZE			(1UL << TSB_BSHIFT)
+#define	TSB_SIZE			(TSB_BSIZE / sizeof(struct tte))
 #define	TSB_BUCKET_SHIFT		(2)
 #define	TSB_BUCKET_SIZE			(1 << TSB_BUCKET_SHIFT)
 #define	TSB_BUCKET_ADDRESS_BITS \
-	(PAGE_SHIFT_8K - TSB_BUCKET_SHIFT - TTE_SHIFT)
+	(TSB_BSHIFT - TSB_BUCKET_SHIFT - TTE_SHIFT)
 #define	TSB_BUCKET_MASK			((1 << TSB_BUCKET_ADDRESS_BITS) - 1)
 
 #define	TSB_KERNEL_SIZE \
@@ -70,10 +73,14 @@ tsb_kvtotte(vm_offset_t va)
 	return (tsb_kvpntotte(va >> PAGE_SHIFT));
 }
 
+typedef int (tsb_callback_t)(struct pmap *, struct tte *, vm_offset_t);
+
 struct	tte *tsb_tte_lookup(pmap_t pm, vm_offset_t va);
 void	tsb_tte_remove(struct tte *stp);
 struct	tte *tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va,
 			   struct tte tte);
 void	tsb_tte_local_remove(struct tte *tp);
+void	tsb_foreach(pmap_t pm, vm_offset_t start, vm_offset_t end,
+		    tsb_callback_t *callback);
 
 #endif /* !_MACHINE_TSB_H_ */
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index a71b05ec0827..2e6e1892314b 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -190,6 +190,24 @@ static void pmap_context_destroy(u_int i);
  */
 static vm_offset_t pmap_bootstrap_alloc(vm_size_t size);
 
+/*
+ * If user pmap is processed with pmap_remove and with pmap_remove and the
+ * resident count drops to 0, there are no more pages to remove, so we
+ * need not continue.
+ */
+#define	PMAP_REMOVE_DONE(pm) \
+	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
+
+/*
+ * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
+ * and pmap_protect() instead of trying each virtual address.
+ */
+#define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
+
+/* Callbacks for tsb_foreach. */
+tsb_callback_t pmap_remove_tte;
+tsb_callback_t pmap_protect_tte;
+
 /*
  * Quick sort callout for comparing memory regions.
  */
@@ -563,6 +581,8 @@ pmap_kenter(vm_offset_t va, vm_offset_t pa)
  * Map a wired page into kernel virtual address space. This additionally
  * takes a flag argument wich is or'ed to the TTE data. This is used by
  * bus_space_map().
+ * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
+ * to flush entries that might still be in the cache, if applicable.
  */
 void
 pmap_kenter_flags(vm_offset_t va, vm_offset_t pa, u_long flags)
@@ -646,6 +666,19 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
 		pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
 }
 
+/*
+ * As above, but take an additional flags argument and call
+ * pmap_kenter_flags().
+ */
+void
+pmap_qenter_flags(vm_offset_t va, vm_page_t *m, int count, u_long fl)
+{
+	int i;
+
+	for (i = 0; i < count; i++, va += PAGE_SIZE)
+		pmap_kenter_flags(va, VM_PAGE_TO_PHYS(m[i]), fl);
+}
+
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by pmap_qenter.
@@ -1096,6 +1129,31 @@ pmap_collect(void)
 	pmap_pagedaemon_waken = 0;
 }
 
+int
+pmap_remove_tte(struct pmap *pm, struct tte *tp, vm_offset_t va)
+{
+	vm_page_t m;
+
+	m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
+	if ((tp->tte_data & TD_PV) != 0) {
+		if ((tp->tte_data & TD_W) != 0 &&
+		    pmap_track_modified(pm, va))
+			vm_page_dirty(m);
+		if ((tp->tte_data & TD_REF) != 0)
+			vm_page_flag_set(m, PG_REFERENCED);
+		pv_remove(pm, m, va);
+		pmap_cache_remove(m, va);
+	}
+	atomic_clear_long(&tp->tte_data, TD_V);
+	tp->tte_tag = 0;
+	tp->tte_data = 0;
+	tlb_page_demap(TLB_ITLB | TLB_DTLB,
+	    pm->pm_context, va);
+	if (PMAP_REMOVE_DONE(pm))
+		return (0);
+	return (1);
+}
+
 /*
  * Remove the given range of addresses from the specified map.
  */
@@ -1103,31 +1161,56 @@ void
 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
 {
 	struct tte *tp;
-	vm_page_t m;
 
 	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
 	    pm->pm_context, start, end);
-	for (; start < end; start += PAGE_SIZE) {
-		if ((tp = tsb_tte_lookup(pm, start)) != NULL) {
-			m = PHYS_TO_VM_PAGE(TD_GET_PA(tp->tte_data));
-			if ((tp->tte_data & TD_PV) != 0) {
-				if ((tp->tte_data & TD_W) != 0 &&
-				    pmap_track_modified(pm, start))
-					vm_page_dirty(m);
-				if ((tp->tte_data & TD_REF) != 0)
-					vm_page_flag_set(m, PG_REFERENCED);
-				pv_remove(pm, m, start);
-				pmap_cache_remove(m, start);
+	if (PMAP_REMOVE_DONE(pm))
+		return;
+	if (end - start > PMAP_TSB_THRESH)
+		tsb_foreach(pm, start, end, pmap_remove_tte);
+	else {
+		for (; start < end; start += PAGE_SIZE) {
+			if ((tp = tsb_tte_lookup(pm, start)) != NULL) {
+				if (!pmap_remove_tte(pm, tp, start))
+					break;
 			}
-			atomic_clear_long(&tp->tte_data, TD_V);
-			tp->tte_tag = 0;
-			tp->tte_data = 0;
-			tlb_page_demap(TLB_ITLB | TLB_DTLB,
-			    pm->pm_context, start);
 		}
 	}
 }
 
+int
+pmap_protect_tte(struct pmap *pm, struct tte *tp, vm_offset_t va)
+{
+	vm_page_t m;
+	u_long data;
+
+	data = tp->tte_data;
+	if ((data & TD_PV) != 0) {
+		m = PHYS_TO_VM_PAGE(TD_GET_PA(data));
+		if ((data & TD_REF) != 0) {
+			vm_page_flag_set(m, PG_REFERENCED);
+			data &= ~TD_REF;
+		}
+		if ((data & TD_W) != 0 &&
+		    pmap_track_modified(pm, va)) {
+			vm_page_dirty(m);
+		}
+	}
+
+	data &= ~(TD_W | TD_SW);
+
+	CTR2(KTR_PMAP, "pmap_protect: new=%#lx old=%#lx",
+	    data, tp->tte_data);
+
+	if (data != tp->tte_data) {
+		CTR0(KTR_PMAP, "pmap_protect: demap");
+		tlb_page_demap(TLB_DTLB | TLB_ITLB,
+		    pm->pm_context, va);
+		tp->tte_data = data;
+	}
+	return (0);
+}
+
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
@@ -1135,8 +1218,6 @@ void
 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	struct tte *tp;
-	vm_page_t m;
-	u_long data;
 
 	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
 	    pm->pm_context, sva, eva, prot);
@@ -1152,32 +1233,12 @@ pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 	if (prot & VM_PROT_WRITE)
 		return;
 
-	for (; sva < eva; sva += PAGE_SIZE) {
-		if ((tp = tsb_tte_lookup(pm, sva)) != NULL) {
-			data = tp->tte_data;
-			if ((data & TD_PV) != 0) {
-				m = PHYS_TO_VM_PAGE(TD_GET_PA(data));
-				if ((data & TD_REF) != 0) {
-					vm_page_flag_set(m, PG_REFERENCED);
-					data &= ~TD_REF;
-				}
-				if ((data & TD_W) != 0 &&
-				    pmap_track_modified(pm, sva)) {
-					vm_page_dirty(m);
-				}
-			}
-	
-			data &= ~(TD_W | TD_SW);
-
-			CTR2(KTR_PMAP, "pmap_protect: new=%#lx old=%#lx",
-			    data, tp->tte_data);
-	
-			if (data != tp->tte_data) {
-				CTR0(KTR_PMAP, "pmap_protect: demap");
-				tlb_page_demap(TLB_DTLB | TLB_ITLB,
-				    pm->pm_context, sva);
-				tp->tte_data = data;
-			}
+	if (eva - sva > PMAP_TSB_THRESH)
+		tsb_foreach(pm, sva, eva, pmap_protect_tte);
+	else {
+		for (; sva < eva; sva += PAGE_SIZE) {
+			if ((tp = tsb_tte_lookup(pm, sva)) != NULL)
+				pmap_protect_tte(pm, tp, sva);
 		}
 	}
 }
@@ -1317,9 +1378,7 @@ pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 		tte.tte_data |= TD_SW;
 	if (prot & VM_PROT_EXECUTE) {
 		tte.tte_data |= TD_EXEC;
-#if 0
 		icache_inval_phys(pa, pa + PAGE_SIZE - 1);
-#endif
 	}
 
 	if (tp != NULL)
diff --git a/sys/sparc64/sparc64/tsb.c b/sys/sparc64/sparc64/tsb.c
index b975baf460d2..50ed4693c6a9 100644
--- a/sys/sparc64/sparc64/tsb.c
+++ b/sys/sparc64/sparc64/tsb.c
@@ -163,3 +163,31 @@ tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va, struct tte tte)
 	CTR1(KTR_TSB, "tsb_tte_enter: return tp=%p", tp);
 	return (tp);
 }
+
+/*
+ * Traverse the tsb of a pmap, calling the callback function for any tte entry
+ * that has a virtual address between start and end. If this function returns 0,
+ * tsb_foreach() terminates.
+ * This is used by pmap_remove() and pmap_protect() in the case that the number
+ * of pages in the range given to them reaches the dimensions of the tsb size as
+ * an optimization.
+ */
+void
+tsb_foreach(pmap_t pm, vm_offset_t start, vm_offset_t end,
+    tsb_callback_t *callback)
+{
+	vm_offset_t va;
+	struct tte *tp;
+	int i;
+
+	for (i = 0; i < TSB_SIZE; i++) {
+		tp = &pm->pm_tsb[i];
+		if ((tp->tte_data & TD_V) != 0) {
+			va = tte_get_va(*tp);
+			if (va >= start && va < end) {
+				if (!callback(pm, tp, va))
+					break;
+			}
+		}
+	}
+}