The getpages VOP was a good stab at getting scatter/gather I/O without

too much kernel copying, but it is not the right way to do it, and it is
in the way for straightening out the buffer cache.

The right way is to pass the VM page array down through the struct
bio to the disk device driver and DMA directly in to/out off the
physical memory.  Once the VM/buf thing is sorted out it is next on
the list.

Retire most of vnode method. ffs_getpages().  It is not clear if what is
left shouldn't be in the default implementation which we now fall back to.

Retire specfs_getpages() as well, as it has no users now.
This commit is contained in:
Poul-Henning Kamp 2004-09-19 08:14:55 +00:00
parent 710df706de
commit d705e025d0
2 changed files with 5 additions and 293 deletions

View File

@ -55,7 +55,6 @@
static int spec_advlock(struct vop_advlock_args *);
static int spec_close(struct vop_close_args *);
static int spec_fsync(struct vop_fsync_args *);
static int spec_getpages(struct vop_getpages_args *);
static int spec_ioctl(struct vop_ioctl_args *);
static int spec_kqfilter(struct vop_kqfilter_args *);
static int spec_open(struct vop_open_args *);
@ -74,7 +73,6 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_close_desc, (vop_t *) spec_close },
{ &vop_create_desc, (vop_t *) vop_panic },
{ &vop_fsync_desc, (vop_t *) spec_fsync },
{ &vop_getpages_desc, (vop_t *) spec_getpages },
{ &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_ioctl_desc, (vop_t *) spec_ioctl },
{ &vop_kqfilter_desc, (vop_t *) spec_kqfilter },
@ -640,182 +638,3 @@ spec_advlock(ap)
return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
}
static int
spec_getpages(ap)
struct vop_getpages_args *ap;
{
vm_offset_t kva;
int error;
int i, pcount, size, s;
daddr_t blkno;
struct buf *bp;
vm_page_t m;
vm_ooffset_t offset;
int toff, nextoff, nread;
struct vnode *vp = ap->a_vp;
int blksiz;
int gotreqpage;
GIANT_REQUIRED;
error = 0;
pcount = round_page(ap->a_count) / PAGE_SIZE;
/*
* Calculate the offset of the transfer and do a sanity check.
* FreeBSD currently only supports an 8 TB range due to b_blkno
* being in DEV_BSIZE ( usually 512 ) byte chunks on call to
* VOP_STRATEGY. XXX
*/
offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
blkno = btodb(offset);
/*
* Round up physical size for real devices. We cannot round using
* v_mount's block size data because v_mount has nothing to do with
* the device. i.e. it's usually '/dev'. We need the physical block
* size for the device itself.
*
* We can't use v_rdev->si_mountpoint because it only exists when the
* block device is mounted. However, we can use v_rdev.
*/
if (vn_isdisk(vp, NULL))
blksiz = vp->v_rdev->si_bsize_phys;
else
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
* Map the pages to be read into the kva.
*/
pmap_qenter(kva, ap->a_m, pcount);
/* Build a minimal buffer header. */
bp->b_iocmd = BIO_READ;
bp->b_iodone = bdone;
KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
bp->b_rcred = crhold(curthread->td_ucred);
bp->b_wcred = crhold(curthread->td_ucred);
bp->b_iooffset = offset;
bp->b_blkno = blkno;
bp->b_lblkno = blkno;
pbgetvp(ap->a_vp, bp);
bp->b_bcount = size;
bp->b_bufsize = size;
bp->b_resid = 0;
bp->b_runningbufspace = bp->b_bufsize;
runningbufspace += bp->b_runningbufspace;
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
/* Do the input. */
spec_xstrategy(bp->b_vp, bp);
s = splbio();
bwait(bp, PVM, "spread");
splx(s);
if ((bp->b_ioflags & BIO_ERROR) != 0) {
if (bp->b_error)
error = bp->b_error;
else
error = EIO;
}
nread = size - bp->b_resid;
if (nread < ap->a_count) {
bzero((caddr_t)kva + nread,
ap->a_count - nread);
}
pmap_qremove(kva, pcount);
gotreqpage = 0;
/*
* While the page is busy, its object field is immutable.
*/
VM_OBJECT_LOCK(ap->a_m[ap->a_reqpage]->object);
vm_page_lock_queues();
for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
nextoff = toff + PAGE_SIZE;
m = ap->a_m[i];
if (nextoff <= nread) {
m->valid = VM_PAGE_BITS_ALL;
vm_page_undirty(m);
} else if (toff < nread) {
/*
* Since this is a VM request, we have to supply the
* unaligned offset to allow vm_page_set_validclean()
* to zero sub-DEV_BSIZE'd portions of the page.
*/
vm_page_set_validclean(m, 0, nread - toff);
} else {
m->valid = 0;
vm_page_undirty(m);
}
if (i != ap->a_reqpage) {
/*
* Just in case someone was asking for this page we
* now tell them that it is ok to use.
*/
if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
if (m->valid) {
if (m->flags & PG_WANTED) {
vm_page_activate(m);
} else {
vm_page_deactivate(m);
}
vm_page_wakeup(m);
} else {
vm_page_free(m);
}
} else {
vm_page_free(m);
}
} else if (m->valid) {
gotreqpage = 1;
/*
* Since this is a VM request, we need to make the
* entire page presentable by zeroing invalid sections.
*/
if (m->valid != VM_PAGE_BITS_ALL)
vm_page_zero_invalid(m, FALSE);
}
}
vm_page_unlock_queues();
if (!gotreqpage) {
m = ap->a_m[ap->a_reqpage];
printf(
"spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
devtoname(bp->b_dev), error, bp, bp->b_vp);
printf(
" size: %d, resid: %ld, a_count: %d, valid: 0x%lx\n",
size, bp->b_resid, ap->a_count, (u_long)m->valid);
printf(
" nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
nread, ap->a_reqpage, (u_long)m->pindex, pcount);
VM_OBJECT_UNLOCK(m->object);
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
return VM_PAGER_ERROR;
}
VM_OBJECT_UNLOCK(ap->a_m[ap->a_reqpage]->object);
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
return VM_PAGER_OK;
}

View File

@ -746,20 +746,9 @@ static int
ffs_getpages(ap)
struct vop_getpages_args *ap;
{
off_t foff, physoffset;
int i, size, bsize;
struct vnode *dp, *vp;
vm_object_t obj;
vm_pindex_t pindex;
int i;
vm_page_t mreq;
int bbackwards, bforwards;
int pbackwards, pforwards;
int firstpage;
ufs2_daddr_t reqblkno, reqlblkno;
int poff;
int pcount;
int rtval;
int pagesperblock;
GIANT_REQUIRED;
@ -787,109 +776,13 @@ ffs_getpages(ap)
return VM_PAGER_OK;
}
VM_OBJECT_UNLOCK(mreq->object);
vp = ap->a_vp;
obj = vp->v_object;
bsize = vp->v_mount->mnt_stat.f_iosize;
pindex = mreq->pindex;
foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
if (bsize < PAGE_SIZE)
return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
ap->a_count,
ap->a_reqpage);
/*
* foff is the file offset of the required page
* reqlblkno is the logical block that contains the page
* poff is the index of the page into the logical block
*/
reqlblkno = foff / bsize;
poff = (foff % bsize) / PAGE_SIZE;
dp = VTOI(vp)->i_devvp;
if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
|| (reqblkno == -1)) {
VM_OBJECT_LOCK(obj);
vm_page_lock_queues();
for(i = 0; i < pcount; i++) {
if (i != ap->a_reqpage)
vm_page_free(ap->a_m[i]);
}
vm_page_unlock_queues();
if (reqblkno == -1) {
if ((mreq->flags & PG_ZERO) == 0)
pmap_zero_page(mreq);
vm_page_undirty(mreq);
mreq->valid = VM_PAGE_BITS_ALL;
VM_OBJECT_UNLOCK(obj);
return VM_PAGER_OK;
} else {
VM_OBJECT_UNLOCK(obj);
return VM_PAGER_ERROR;
}
}
physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
pagesperblock = bsize / PAGE_SIZE;
/*
* find the first page that is contiguous...
* note that pbackwards is the number of pages that are contiguous
* backwards.
*/
firstpage = 0;
if (ap->a_count) {
pbackwards = poff + bbackwards * pagesperblock;
if (ap->a_reqpage > pbackwards) {
firstpage = ap->a_reqpage - pbackwards;
VM_OBJECT_LOCK(obj);
vm_page_lock_queues();
for(i=0;i<firstpage;i++)
vm_page_free(ap->a_m[i]);
vm_page_unlock_queues();
VM_OBJECT_UNLOCK(obj);
}
/*
* pforwards is the number of pages that are contiguous
* after the current page.
*/
pforwards = (pagesperblock - (poff + 1)) +
bforwards * pagesperblock;
if (pforwards < (pcount - (ap->a_reqpage + 1))) {
VM_OBJECT_LOCK(obj);
vm_page_lock_queues();
for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
vm_page_free(ap->a_m[i]);
vm_page_unlock_queues();
VM_OBJECT_UNLOCK(obj);
pcount = ap->a_reqpage + pforwards + 1;
}
/*
* number of pages for I/O corrected for the non-contig pages at
* the beginning of the array.
*/
pcount -= firstpage;
}
/*
* calculate the size of the transfer
*/
size = pcount * PAGE_SIZE;
if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
obj->un_pager.vnp.vnp_size)
size = obj->un_pager.vnp.vnp_size -
IDX_TO_OFF(ap->a_m[firstpage]->pindex);
physoffset -= foff;
rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
(ap->a_reqpage - firstpage), physoffset);
return (rtval);
return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
ap->a_count,
ap->a_reqpage);
}
/*
* Extended attribute area reading.
*/