From ab8f55b9fdcbb5720b8520f3a61cbc3cab16cb66 Mon Sep 17 00:00:00 2001 From: Scott Long Date: Tue, 7 May 2013 08:16:21 +0000 Subject: [PATCH] Add a sysctl vfs.read_min to complement the exiting vfs.read_max. It defaults to 1, meaning that it's off. When read-ahead is enabled on a file, the vfs cluster code deliberately breaks a read into 2 I/O transactions; one to satisfy the actual read, and one to perform read-ahead. This makes sense in low-latency circumstances, but often produces unbalanced i/o transactions that penalize disks. By setting vfs.read_min, we can tell the algorithm to fetch a larger transaction that what we asked for, achieving the same effect as the read-ahead but without the doubled, unbalanced transaction and the slightly lower latency. This significantly helps our workloads with video streaming. Submitted by: emax Reviewed by: kib Obtained from: Netflix --- sys/kern/vfs_cluster.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 91a044319185..d619960cbb89 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -76,6 +76,10 @@ static int read_max = 64; SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0, "Cluster read-ahead max block count"); +static int read_min = 1; +SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0, + "Cluster read min block count"); + /* Page expended to mark partially backed buffers */ extern vm_page_t bogus_page; @@ -166,12 +170,20 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size, } else { off_t firstread = bp->b_offset; int nblks; + long minread; KASSERT(bp->b_offset != NOOFFSET, ("cluster_read: no buffer offset")); ncontig = 0; + /* + * Adjust totread if needed + */ + minread = read_min * size; + if (minread > totread) + totread = minread; + /* * Compute the total number of blocks that we should read * synchronously.