Add a sysctl vfs.read_min to complement the exiting vfs.read_max. It

defaults to 1, meaning that it's off. When read-ahead is enabled on a file, the vfs cluster code deliberately breaks a read into 2 I/O transactions; one to satisfy the actual read, and one to perform read-ahead. This makes sense in low-latency circumstances, but often produces unbalanced i/o transactions that penalize disks. By setting vfs.read_min, we can tell the algorithm to fetch a larger transaction that what we asked for, achieving the same effect as the read-ahead but without the doubled, unbalanced transaction and the slightly lower latency. This significantly helps our workloads with video streaming. Submitted by: emax Reviewed by: kib Obtained from: Netflix
2013-05-07 08:16:21 +00:00 · 2013-05-07 08:16:21 +00:00 · ab8f55b9fd
commit ab8f55b9fd
parent 7dcb2bea01
1 changed files with 12 additions and 0 deletions
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@ -76,6 +76,10 @@ static int read_max = 64;
 SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0,
    "Cluster read-ahead max block count");

+static int read_min = 1;
+SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
+    "Cluster read min block count");
+
 /* Page expended to mark partially backed buffers */
 extern vm_page_t	bogus_page;

@ -166,12 +170,20 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
 	} else {
 		off_t firstread = bp->b_offset;
 		int nblks;
+		long minread;

 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("cluster_read: no buffer offset"));

 		ncontig = 0;

+		/*
+		 * Adjust totread if needed
+		 */
+		minread = read_min * size;
+		if (minread > totread)
+			totread = minread;
+
 		/*
 		 * Compute the total number of blocks that we should read
 		 * synchronously.