From 0eaffce7b350a432506f61d1240b4021dfced858 Mon Sep 17 00:00:00 2001 From: scottl Date: Tue, 30 Jul 2013 23:26:05 +0000 Subject: [PATCH] Create a knob, kern.ipc.sfreadahead, that allows one to tune the amount of readahead that sendfile() will do. Default remains the same. Obtained from: Netflix MFC after: 3 days --- sys/kern/kern_mutex.c | 20 ++++++++++++++------ sys/kern/subr_uio.c | 8 +++++++- sys/kern/uipc_syscalls.c | 9 +++++++-- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index a613da8950ba..cd1ed7dbf4ea 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -218,13 +218,14 @@ __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); - WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, - file, line, NULL); + WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | + LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); - WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); + WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, + file, line); curthread->td_locks++; } @@ -271,9 +272,11 @@ __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) - KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, + KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || + (opts & MTX_RECURSE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); + opts &= ~MTX_RECURSE; WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock_spin(m, curthread, opts, file, line); @@ -335,12 +338,14 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); - if (mtx_owned(m) && (m->lock_object.lo_flags & LO_RECURSABLE) != 0) { + if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || + (opts & MTX_RECURSE) != 0)) { m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); rval = 1; } else rval = _mtx_obtain_lock(m, (uintptr_t)curthread); + opts &= ~MTX_RECURSE; LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line); if (rval) { @@ -391,15 +396,18 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, m = mtxlock2mtx(c); if (mtx_owned(m)) { - KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, + KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || + (opts & MTX_RECURSE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); + opts &= ~MTX_RECURSE; m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); return; } + opts &= ~MTX_RECURSE; #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c index 1ee265c7dde9..efa483f9df94 100644 --- a/sys/kern/subr_uio.c +++ b/sys/kern/subr_uio.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef SOCKET_SEND_COW #include @@ -122,7 +123,12 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) if (uobject->backing_object != NULL) pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); } - vm_page_insert(kern_pg, uobject, upindex); + if (vm_page_insert(kern_pg, uobject, upindex)) { + VM_OBJECT_WUNLOCK(uobject); + VM_WAIT; + VM_OBJECT_WLOCK(uobject); + goto retry; + } vm_page_dirty(kern_pg); VM_OBJECT_WUNLOCK(uobject); vm_map_lookup_done(map, entry); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 46ceef2bc446..07e169e837f4 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -122,6 +122,7 @@ counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; int nsfbufs; int nsfbufspeak; int nsfbufsused; +static int sfreadahead = MAXPHYS / MAXBSIZE; SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, "Maximum number of sendfile(2) sf_bufs available"); @@ -129,6 +130,9 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, "Number of sendfile(2) sf_bufs at peak usage"); SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); +SYSCTL_INT(_kern_ipc, OID_AUTO, sfreadahead, CTLFLAG_RW, &sfreadahead, 0, + "Number of sendfile(2) read-ahead MAXBSIZE blocks"); + static void sfstat_init(const void *unused) @@ -2240,6 +2244,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, error = EBUSY; else { ssize_t resid; + int readahead = sfreadahead * MAXBSIZE; /* * Ensure that our page is still around @@ -2255,9 +2260,9 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * wrong, but is consistent with our original * implementation. */ - error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, + error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | - IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), + IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); VM_OBJECT_WLOCK(obj); vm_page_io_finish(pg);