Allow the VM object flushing code to cluster. When the filesystem syncer

comes along and flushes a file which has been mmap()'d SHARED/RW, with dirty pages, it was flushing the underlying VM object asynchronously, resulting in thousands of 8K writes. With this change the VM Object flushing code will cluster dirty pages in 64K blocks. Note that until the low memory deadlock issue is reviewed, it is not safe to allow the pageout daemon to use this feature. Forced pageouts still use fs block size'd ops for the moment. MFC after: 3 days
2002-12-28 21:03:42 +00:00 · 2002-12-28 21:03:42 +00:00 · 43b7990e30
commit 43b7990e30
parent 3a3d82ec0a
3 changed files with 22 additions and 7 deletions
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@ -631,6 +631,9 @@ vm_object_terminate(vm_object_t object)
 *	write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC),
 *	leaving the object dirty.
 *
+ *	When stuffing pages asynchronously, allow clustering.  XXX we need a
+ *	synchronous clustering mode implementation.
+ *
 *	Odd semantics: if start == end, we clean everything.
 *
 *	The object must be locked.
@ -652,7 +655,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
 		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
 		return;

-	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : 0;
+	pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK;
 	pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0;

 	vp = object->handle;
@ -682,6 +685,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
 		scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
 		if (scanreset < 16)
 			scanreset = 16;
+		pagerflags |= VM_PAGER_IGNORE_CLEANCHK;

 		scanlimit = scanreset;
 		tscan = tstart;
@ -732,6 +736,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
 			vm_object_clear_flag(object, OBJ_CLEANING);
 			return;
 		}
+		pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK;
 	}

 	/*
@ -854,7 +859,8 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,

 		if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
 			if ((tp->flags & PG_BUSY) ||
-				(tp->flags & PG_CLEANCHK) == 0 ||
+				((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
+				 (tp->flags & PG_CLEANCHK) == 0) ||
 				(tp->busy != 0))
 				break;
 			if((tp->queue - tp->pc) == PQ_CACHE) {
@ -881,7 +887,8 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,

 			if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
 				if ((tp->flags & PG_BUSY) ||
-					(tp->flags & PG_CLEANCHK) == 0 ||
+					((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 &&
+					 (tp->flags & PG_CLEANCHK) == 0) ||
 					(tp->busy != 0))
 					break;
 				if ((tp->queue - tp->pc) == PQ_CACHE) {
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@ -79,8 +79,10 @@ struct pagerops {
 #define	VM_PAGER_ERROR	4
 #define VM_PAGER_AGAIN	5

-#define	VM_PAGER_PUT_SYNC	0x1
-#define	VM_PAGER_PUT_INVAL	0x2
+#define	VM_PAGER_PUT_SYNC		0x0001
+#define	VM_PAGER_PUT_INVAL		0x0002
+#define VM_PAGER_IGNORE_CLEANCHK	0x0004
+#define VM_PAGER_CLUSTER_OK		0x0008

 #ifdef _KERNEL
 #ifdef MALLOC_DECLARE
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@ -1044,11 +1044,17 @@ vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
 	/*
 	 * pageouts are already clustered, use IO_ASYNC t o force a bawrite()
 	 * rather then a bdwrite() to prevent paging I/O from saturating 
-	 * the buffer cache.
+	 * the buffer cache.  Dummy-up the sequential heuristic to cause
+	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
+	 * the system decides how to cluster.
 	 */
 	ioflags = IO_VMIO;
-	ioflags |= (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) ? IO_SYNC: IO_ASYNC;
+	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
+		ioflags |= IO_SYNC;
+	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
+		ioflags |= IO_ASYNC;
 	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
+	ioflags |= IO_SEQMAX << IO_SEQSHIFT;

 	aiov.iov_base = (caddr_t) 0;
 	aiov.iov_len = maxsize;