Merge from projects/sendfile:

o Provide a new VOP_GETPAGES_ASYNC(), which works like VOP_GETPAGES(), but
  doesn't sleep. It returns immediately, and will execute the I/O done handler
  function that must be supplied as argument.
o Provide VOP_GETPAGES_ASYNC() for the FFS, which uses vnode_pager.
o Extend pagertab to support pgo_getpages_async method, and implement this
  method for vnode_pager.

Reviewed by:	kib
Tested by:	pho
Sponsored by:	Netflix
Sponsored by:	Nginx, Inc.
This commit is contained in:
Gleb Smirnoff 2014-11-23 12:01:52 +00:00
parent 03fcebe6a2
commit 90effb2341
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=274914
9 changed files with 217 additions and 52 deletions

View File

@ -83,6 +83,7 @@ static int vop_stdset_text(struct vop_set_text_args *ap);
static int vop_stdunset_text(struct vop_unset_text_args *ap);
static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
/*
* This vnode table stores what we want to do if the filesystem doesn't
@ -111,6 +112,7 @@ struct vop_vector default_vnodeops = {
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
.vop_getpages = vop_stdgetpages,
.vop_getpages_async = vop_stdgetpages_async,
.vop_getwritemount = vop_stdgetwritemount,
.vop_inactive = VOP_NULL,
.vop_ioctl = VOP_ENOTTY,
@ -725,7 +727,17 @@ vop_stdgetpages(ap)
{
return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
ap->a_count, ap->a_reqpage);
ap->a_count, ap->a_reqpage, NULL, NULL);
}
static int
vop_stdgetpages_async(struct vop_getpages_async_args *ap)
{
int error;
error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage);
ap->a_iodone(ap->a_arg, ap->a_m, ap->a_reqpage, error);
return (error);
}
int

View File

@ -476,6 +476,19 @@ vop_getpages {
};
%% getpages_async vp L L L
vop_getpages_async {
IN struct vnode *vp;
IN vm_page_t *m;
IN int count;
IN int reqpage;
IN vm_ooffset_t offset;
IN vop_getpages_iodone_t *iodone;
IN void *arg;
};
%% putpages vp L L L
vop_putpages {

View File

@ -107,7 +107,6 @@ struct buf {
off_t b_offset; /* Offset into file. */
TAILQ_ENTRY(buf) b_bobufs; /* (V) Buffer's associated vnode. */
uint32_t b_vflags; /* (V) BV_* flags */
TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
unsigned short b_qindex; /* (Q) buffer queue index */
uint32_t b_flags; /* B_* flags. */
b_xflags_t b_xflags; /* extra flags */
@ -124,9 +123,15 @@ struct buf {
struct ucred *b_rcred; /* Read credentials reference. */
struct ucred *b_wcred; /* Write credentials reference. */
void *b_saveaddr; /* Original b_addr for physio. */
union pager_info {
int pg_reqpage;
} b_pager;
union {
TAILQ_ENTRY(buf) bu_freelist; /* (Q) */
struct {
void (*pg_iodone)(void *, vm_page_t *, int, int);
int pg_reqpage;
} bu_pager;
} b_union;
#define b_freelist b_union.bu_freelist
#define b_pager b_union.bu_pager
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;

View File

@ -574,6 +574,7 @@ vn_canvmio(struct vnode *vp)
/*
* Finally, include the default set of vnode operations.
*/
typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
#include "vnode_if.h"
/* vn_open_flags */

View File

@ -124,6 +124,7 @@ struct vop_vector ffs_vnodeops1 = {
.vop_default = &ufs_vnodeops,
.vop_fsync = ffs_fsync,
.vop_getpages = vnode_pager_local_getpages,
.vop_getpages_async = vnode_pager_local_getpages_async,
.vop_lock1 = ffs_lock,
.vop_read = ffs_read,
.vop_reallocblks = ffs_reallocblks,
@ -143,6 +144,7 @@ struct vop_vector ffs_vnodeops2 = {
.vop_default = &ufs_vnodeops,
.vop_fsync = ffs_fsync,
.vop_getpages = vnode_pager_local_getpages,
.vop_getpages_async = vnode_pager_local_getpages_async,
.vop_lock1 = ffs_lock,
.vop_read = ffs_read,
.vop_reallocblks = ffs_reallocblks,

View File

@ -361,6 +361,8 @@ static vm_object_t
vm_prot_t prot, vm_ooffset_t offset, struct ucred *);
static void swap_pager_dealloc(vm_object_t object);
static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int);
static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int,
pgo_getpages_iodone_t, void *);
static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
static boolean_t
swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
@ -373,6 +375,7 @@ struct pagerops swappagerops = {
.pgo_alloc = swap_pager_alloc, /* allocate an OBJT_SWAP object */
.pgo_dealloc = swap_pager_dealloc, /* deallocate an OBJT_SWAP object */
.pgo_getpages = swap_pager_getpages, /* pagein */
.pgo_getpages_async = swap_pager_getpages_async, /* pagein (async) */
.pgo_putpages = swap_pager_putpages, /* pageout */
.pgo_haspage = swap_pager_haspage, /* get backing store status for page */
.pgo_pageunswapped = swap_pager_unswapped, /* remove swap related to page */
@ -1256,6 +1259,39 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
*/
}
/*
* swap_pager_getpages_async():
*
* Right now this is emulation of asynchronous operation on top of
* swap_pager_getpages().
*/
static int
swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count,
int reqpage, pgo_getpages_iodone_t iodone, void *arg)
{
int r, error;
r = swap_pager_getpages(object, m, count, reqpage);
VM_OBJECT_WUNLOCK(object);
switch (r) {
case VM_PAGER_OK:
error = 0;
break;
case VM_PAGER_ERROR:
error = EIO;
break;
case VM_PAGER_FAIL:
error = EINVAL;
break;
default:
panic("unhandled swap_pager_getpages() error %d\n", r);
}
(iodone)(arg, m, count, error);
VM_OBJECT_WLOCK(object);
return (r);
}
/*
* swap_pager_putpages:
*

View File

@ -51,6 +51,9 @@ typedef vm_object_t pgo_alloc_t(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t,
struct ucred *);
typedef void pgo_dealloc_t(vm_object_t);
typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int);
typedef void pgo_getpages_iodone_t(void *, vm_page_t *, int, int);
typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int,
pgo_getpages_iodone_t, void *);
typedef void pgo_putpages_t(vm_object_t, vm_page_t *, int, int, int *);
typedef boolean_t pgo_haspage_t(vm_object_t, vm_pindex_t, int *, int *);
typedef void pgo_pageunswapped_t(vm_page_t);
@ -60,6 +63,7 @@ struct pagerops {
pgo_alloc_t *pgo_alloc; /* Allocate pager. */
pgo_dealloc_t *pgo_dealloc; /* Disassociate. */
pgo_getpages_t *pgo_getpages; /* Get (read) page. */
pgo_getpages_async_t *pgo_getpages_async; /* Get page asyncly. */
pgo_putpages_t *pgo_putpages; /* Put (write) page. */
pgo_haspage_t *pgo_haspage; /* Query page. */
pgo_pageunswapped_t *pgo_pageunswapped;
@ -103,6 +107,8 @@ vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t,
void vm_pager_bufferinit(void);
void vm_pager_deallocate(vm_object_t);
static __inline int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int);
static inline int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int,
int, pgo_getpages_iodone_t, void *);
static __inline boolean_t vm_pager_has_page(vm_object_t, vm_pindex_t, int *, int *);
void vm_pager_init(void);
vm_object_t vm_pager_object_lookup(struct pagerlst *, void *);
@ -133,6 +139,16 @@ vm_pager_get_pages(
return (r);
}
static inline int
vm_pager_get_pages_async(vm_object_t object, vm_page_t *m, int count,
int reqpage, pgo_getpages_iodone_t iodone, void *arg)
{
VM_OBJECT_ASSERT_WLOCKED(object);
return ((*pagertab[object->type]->pgo_getpages_async)(object, m,
count, reqpage, iodone, arg));
}
static __inline void
vm_pager_put_pages(
vm_object_t object,

View File

@ -82,16 +82,23 @@ static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
static int vnode_pager_input_old(vm_object_t object, vm_page_t m);
static void vnode_pager_dealloc(vm_object_t);
static int vnode_pager_local_getpages0(struct vnode *, vm_page_t *, int, int,
vop_getpages_iodone_t, void *);
static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int,
vop_getpages_iodone_t, void *);
static void vnode_pager_putpages(vm_object_t, vm_page_t *, int, int, int *);
static boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
static vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
vm_ooffset_t, struct ucred *cred);
static int vnode_pager_generic_getpages_done(struct buf *);
static void vnode_pager_generic_getpages_done_async(struct buf *);
struct pagerops vnodepagerops = {
.pgo_alloc = vnode_pager_alloc,
.pgo_dealloc = vnode_pager_dealloc,
.pgo_getpages = vnode_pager_getpages,
.pgo_getpages_async = vnode_pager_getpages_async,
.pgo_putpages = vnode_pager_putpages,
.pgo_haspage = vnode_pager_haspage,
};
@ -664,16 +671,51 @@ vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
return rtval;
}
static int
vnode_pager_getpages_async(vm_object_t object, vm_page_t *m, int count,
int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
struct vnode *vp;
int rtval;
vp = object->handle;
VM_OBJECT_WUNLOCK(object);
rtval = VOP_GETPAGES_ASYNC(vp, m, count * PAGE_SIZE, reqpage, 0,
iodone, arg);
KASSERT(rtval != EOPNOTSUPP,
("vnode_pager: FS getpages_async not implemented\n"));
VM_OBJECT_WLOCK(object);
return (rtval);
}
/*
* The implementation of VOP_GETPAGES() for local filesystems, where
* partially valid pages can only occur at the end of file.
* The implementation of VOP_GETPAGES() and VOP_GETPAGES_ASYNC() for
* local filesystems, where partially valid pages can only occur at
* the end of file.
*/
int
vnode_pager_local_getpages(struct vop_getpages_args *ap)
{
return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count,
ap->a_reqpage, NULL, NULL));
}
int
vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap)
{
return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count,
ap->a_reqpage, ap->a_iodone, ap->a_arg));
}
static int
vnode_pager_local_getpages0(struct vnode *vp, vm_page_t *m, int bytecount,
int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
vm_page_t mreq;
mreq = ap->a_m[ap->a_reqpage];
mreq = m[reqpage];
/*
* Since the caller has busied the requested page, that page's valid
@ -688,13 +730,15 @@ vnode_pager_local_getpages(struct vop_getpages_args *ap)
* pages, since no i/o is done to read its content.
*/
if (mreq->valid != 0) {
vm_pager_free_nonreq(mreq->object, ap->a_m, ap->a_reqpage,
round_page(ap->a_count) / PAGE_SIZE);
vm_pager_free_nonreq(mreq->object, m, reqpage,
round_page(bytecount) / PAGE_SIZE);
if (iodone != NULL)
iodone(arg, m, reqpage, 0);
return (VM_PAGER_OK);
}
return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
ap->a_count, ap->a_reqpage));
return (vnode_pager_generic_getpages(vp, m, bytecount, reqpage,
iodone, arg));
}
/*
@ -703,11 +747,10 @@ vnode_pager_local_getpages(struct vop_getpages_args *ap)
*/
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
int reqpage)
int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
vm_object_t object;
vm_offset_t kva;
off_t foff, tfoff, nextoff;
off_t foff;
int i, j, size, bsize, first;
daddr_t firstaddr, reqblock;
struct bufobj *bo;
@ -899,7 +942,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
}
bp = getpbuf(&vnode_pbuf_freecnt);
kva = (vm_offset_t)bp->b_data;
bp->b_kvaalloc = bp->b_data;
/*
* and map the pages to be read into the kva, if the filesystem
@ -911,15 +954,11 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
bp->b_kvabase = unmapped_buf;
bp->b_offset = 0;
bp->b_flags |= B_UNMAPPED;
bp->b_npages = count;
for (i = 0; i < count; i++)
bp->b_pages[i] = m[i];
} else
pmap_qenter(kva, m, count);
pmap_qenter((vm_offset_t)bp->b_kvaalloc, m, count);
/* build a minimal buffer header */
bp->b_iocmd = BIO_READ;
bp->b_iodone = bdone;
KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
bp->b_rcred = crhold(curthread->td_ucred);
@ -930,6 +969,10 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
bp->b_bcount = size;
bp->b_bufsize = size;
bp->b_runningbufspace = bp->b_bufsize;
for (i = 0; i < count; i++)
bp->b_pages[i] = m[i];
bp->b_npages = count;
bp->b_pager.pg_reqpage = reqpage;
atomic_add_long(&runningbufspace, bp->b_runningbufspace);
PCPU_INC(cnt.v_vnodein);
@ -937,43 +980,79 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
/* do the input */
bp->b_iooffset = dbtob(bp->b_blkno);
bstrategy(bp);
bwait(bp, PVM, "vnread");
if ((bp->b_ioflags & BIO_ERROR) != 0)
error = EIO;
if (error == 0 && size != count * PAGE_SIZE) {
if ((bp->b_flags & B_UNMAPPED) != 0) {
bp->b_flags &= ~B_UNMAPPED;
pmap_qenter(kva, m, count);
}
bzero((caddr_t)kva + size, PAGE_SIZE * count - size);
}
if ((bp->b_flags & B_UNMAPPED) == 0)
pmap_qremove(kva, count);
if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) {
bp->b_data = (caddr_t)kva;
bp->b_kvabase = (caddr_t)kva;
bp->b_flags &= ~B_UNMAPPED;
for (i = 0; i < count; i++)
if (iodone != NULL) { /* async */
bp->b_pager.pg_iodone = iodone;
bp->b_caller1 = arg;
bp->b_iodone = vnode_pager_generic_getpages_done_async;
bp->b_flags |= B_ASYNC;
BUF_KERNPROC(bp);
bstrategy(bp);
/* Good bye! */
} else {
bp->b_iodone = bdone;
bstrategy(bp);
bwait(bp, PVM, "vnread");
error = vnode_pager_generic_getpages_done(bp);
for (int i = 0; i < bp->b_npages; i++)
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
relpbuf(bp, &vnode_pbuf_freecnt);
}
/*
* free the buffer header back to the swap buffer pool
*/
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}
static void
vnode_pager_generic_getpages_done_async(struct buf *bp)
{
int error;
error = vnode_pager_generic_getpages_done(bp);
bp->b_pager.pg_iodone(bp->b_caller1, bp->b_pages,
bp->b_pager.pg_reqpage, error);
for (int i = 0; i < bp->b_npages; i++)
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
relpbuf(bp, &vnode_pbuf_freecnt);
}
static int
vnode_pager_generic_getpages_done(struct buf *bp)
{
vm_object_t object;
off_t tfoff, nextoff;
int i, error;
error = (bp->b_ioflags & BIO_ERROR) != 0 ? EIO : 0;
object = bp->b_vp->v_object;
if (error == 0 && bp->b_bcount != bp->b_npages * PAGE_SIZE) {
if ((bp->b_flags & B_UNMAPPED) != 0) {
bp->b_flags &= ~B_UNMAPPED;
pmap_qenter((vm_offset_t)bp->b_kvaalloc, bp->b_pages,
bp->b_npages);
}
bzero(bp->b_kvaalloc + bp->b_bcount,
PAGE_SIZE * bp->b_npages - bp->b_bcount);
}
if ((bp->b_flags & B_UNMAPPED) == 0)
pmap_qremove((vm_offset_t)bp->b_kvaalloc, bp->b_npages);
if ((bp->b_vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) {
bp->b_data = bp->b_kvaalloc;
bp->b_kvabase = bp->b_kvaalloc;
bp->b_flags &= ~B_UNMAPPED;
}
VM_OBJECT_WLOCK(object);
for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
for (i = 0, tfoff = IDX_TO_OFF(bp->b_pages[0]->pindex);
i < bp->b_npages; i++, tfoff = nextoff) {
vm_page_t mt;
nextoff = tfoff + PAGE_SIZE;
mt = m[i];
mt = bp->b_pages[i];
if (nextoff <= object->un_pager.vnp.vnp_size) {
/*
@ -999,14 +1078,14 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
("%s: page %p is dirty", __func__, mt));
}
if (i != reqpage)
if (i != bp->b_pager.pg_reqpage)
vm_page_readahead_finish(mt);
}
VM_OBJECT_WUNLOCK(object);
if (error) {
printf("vnode_pager_getpages: I/O read error\n");
}
return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
if (error != 0)
printf("%s: I/O read error %d\n", __func__, error);
return (error);
}
/*

View File

@ -41,11 +41,12 @@
#ifdef _KERNEL
int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m,
int count, int reqpage);
int count, int reqpage, vop_getpages_iodone_t iodone, void *arg);
int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m,
int count, boolean_t sync,
int *rtvals);
int vnode_pager_local_getpages(struct vop_getpages_args *ap);
int vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap);
void vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
vm_offset_t end);