Drop the object lock in vfs_bio and cluster where it is now safe to do so.

Recent changes to busy/valid/dirty have enabled page based synchronization
and the object lock is no longer required in many cases.

Reviewed by:	kib
Sponsored by:	Netflix, Intel
Differential Revision:	https://reviews.freebsd.org/D21597
This commit is contained in:
Jeff Roberson 2019-10-29 20:37:59 +00:00
parent 5a84f74f5b
commit 6ee653cfeb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=354155
2 changed files with 59 additions and 107 deletions

View File

@ -162,7 +162,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m);
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off,
vm_page_t m);
static void vfs_clean_pages_dirty_buf(struct buf *bp);
static void vfs_setdirty_locked_object(struct buf *bp);
static void vfs_setdirty_range(struct buf *bp);
static void vfs_vmio_invalidate(struct buf *bp);
static void vfs_vmio_truncate(struct buf *bp, int npages);
static void vfs_vmio_extend(struct buf *bp, int npages, int size);
@ -955,8 +955,6 @@ vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off,
vm_offset_t size, vm_page_t m)
{
VM_OBJECT_ASSERT_LOCKED(m->object);
/*
* This function and its results are protected by higher level
* synchronization requiring vnode and buf locks to page in and
@ -2865,7 +2863,6 @@ vfs_vmio_iodone(struct buf *bp)
bogus = false;
iosize = bp->b_bcount - bp->b_resid;
VM_OBJECT_WLOCK(obj);
for (i = 0; i < bp->b_npages; i++) {
resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
if (resid > iosize)
@ -2876,7 +2873,10 @@ vfs_vmio_iodone(struct buf *bp)
*/
m = bp->b_pages[i];
if (m == bogus_page) {
bogus = true;
if (bogus == false) {
bogus = true;
VM_OBJECT_RLOCK(obj);
}
m = vm_page_lookup(obj, OFF_TO_IDX(foff));
if (m == NULL)
panic("biodone: page disappeared!");
@ -2900,8 +2900,9 @@ vfs_vmio_iodone(struct buf *bp)
foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
iosize -= resid;
}
if (bogus)
VM_OBJECT_RUNLOCK(obj);
vm_object_pip_wakeupn(obj, bp->b_npages);
VM_OBJECT_WUNLOCK(obj);
if (bogus && buf_mapped(bp)) {
BUF_CHECK_MAPPED(bp);
pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
@ -3029,7 +3030,6 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int size)
* are not valid for the range covered by the buffer.
*/
obj = bp->b_bufobj->bo_object;
VM_OBJECT_WLOCK(obj);
if (bp->b_npages < desiredpages) {
/*
* We must allocate system pages since blocking
@ -3041,11 +3041,13 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int size)
* deadlocks once allocbuf() is called after
* pages are vfs_busy_pages().
*/
VM_OBJECT_WLOCK(obj);
(void)vm_page_grab_pages(obj,
OFF_TO_IDX(bp->b_offset) + bp->b_npages,
VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY |
VM_ALLOC_NOBUSY | VM_ALLOC_WIRED,
&bp->b_pages[bp->b_npages], desiredpages - bp->b_npages);
VM_OBJECT_WUNLOCK(obj);
bp->b_npages = desiredpages;
}
@ -3076,7 +3078,6 @@ vfs_vmio_extend(struct buf *bp, int desiredpages, int size)
toff += tinc;
tinc = PAGE_SIZE;
}
VM_OBJECT_WUNLOCK(obj);
/*
* Step 3, fixup the KVA pmap.
@ -3656,9 +3657,8 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
KASSERT(bp->b_offset != NOOFFSET,
("vfs_clean_pages_dirty_buf: no buffer offset"));
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
vfs_busy_pages_acquire(bp);
vfs_setdirty_locked_object(bp);
vfs_setdirty_range(bp);
for (i = 0; i < bp->b_npages; i++) {
noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
eoff = noff;
@ -3670,69 +3670,57 @@ vfs_clean_pages_dirty_buf(struct buf *bp)
foff = noff;
}
vfs_busy_pages_release(bp);
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
}
static void
vfs_setdirty_locked_object(struct buf *bp)
vfs_setdirty_range(struct buf *bp)
{
vm_object_t object;
vm_offset_t boffset;
vm_offset_t eoffset;
int i;
object = bp->b_bufobj->bo_object;
VM_OBJECT_ASSERT_WLOCKED(object);
/*
* test the pages to see if they have been modified directly
* by users through the VM system.
*/
for (i = 0; i < bp->b_npages; i++)
vm_page_test_dirty(bp->b_pages[i]);
/*
* We qualify the scan for modified pages on whether the
* object has been flushed yet.
* Calculate the encompassing dirty range, boffset and eoffset,
* (eoffset - boffset) bytes.
*/
if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) {
vm_offset_t boffset;
vm_offset_t eoffset;
/*
* test the pages to see if they have been modified directly
* by users through the VM system.
*/
for (i = 0; i < bp->b_npages; i++)
vm_page_test_dirty(bp->b_pages[i]);
for (i = 0; i < bp->b_npages; i++) {
if (bp->b_pages[i]->dirty)
break;
}
boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
/*
* Calculate the encompassing dirty range, boffset and eoffset,
* (eoffset - boffset) bytes.
*/
for (i = 0; i < bp->b_npages; i++) {
if (bp->b_pages[i]->dirty)
break;
for (i = bp->b_npages - 1; i >= 0; --i) {
if (bp->b_pages[i]->dirty) {
break;
}
boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
}
eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
for (i = bp->b_npages - 1; i >= 0; --i) {
if (bp->b_pages[i]->dirty) {
break;
}
}
eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
/*
* Fit it to the buffer.
*/
/*
* Fit it to the buffer.
*/
if (eoffset > bp->b_bcount)
eoffset = bp->b_bcount;
if (eoffset > bp->b_bcount)
eoffset = bp->b_bcount;
/*
* If we have a good dirty range, merge with the existing
* dirty range.
*/
/*
* If we have a good dirty range, merge with the existing
* dirty range.
*/
if (boffset < eoffset) {
if (bp->b_dirtyoff > boffset)
bp->b_dirtyoff = boffset;
if (bp->b_dirtyend < eoffset)
bp->b_dirtyend = eoffset;
}
if (boffset < eoffset) {
if (bp->b_dirtyoff > boffset)
bp->b_dirtyoff = boffset;
if (bp->b_dirtyend < eoffset)
bp->b_dirtyend = eoffset;
}
}
@ -4472,16 +4460,21 @@ vfs_unbusy_pages(struct buf *bp)
int i;
vm_object_t obj;
vm_page_t m;
bool bogus;
runningbufwakeup(bp);
if (!(bp->b_flags & B_VMIO))
return;
obj = bp->b_bufobj->bo_object;
VM_OBJECT_WLOCK(obj);
bogus = false;
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
if (m == bogus_page) {
if (bogus == false) {
bogus = true;
VM_OBJECT_RLOCK(obj);
}
m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
if (!m)
panic("vfs_unbusy_pages: page missing\n");
@ -4495,8 +4488,9 @@ vfs_unbusy_pages(struct buf *bp)
}
vm_page_sunbusy(m);
}
if (bogus)
VM_OBJECT_RUNLOCK(obj);
vm_object_pip_wakeupn(obj, bp->b_npages);
VM_OBJECT_WUNLOCK(obj);
}
/*
@ -4573,7 +4567,6 @@ vfs_busy_pages_acquire(struct buf *bp)
{
int i;
VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
for (i = 0; i < bp->b_npages; i++)
vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY);
}
@ -4583,7 +4576,6 @@ vfs_busy_pages_release(struct buf *bp)
{
int i;
VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object);
for (i = 0; i < bp->b_npages; i++)
vm_page_sunbusy(bp->b_pages[i]);
}
@ -4616,13 +4608,12 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
foff = bp->b_offset;
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
VM_OBJECT_WLOCK(obj);
if ((bp->b_flags & B_CLUSTER) == 0) {
vm_object_pip_add(obj, bp->b_npages);
vfs_busy_pages_acquire(bp);
}
if (bp->b_bufsize != 0)
vfs_setdirty_locked_object(bp);
vfs_setdirty_range(bp);
bogus = false;
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
@ -4653,7 +4644,6 @@ vfs_busy_pages(struct buf *bp, int clear_modify)
}
foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
}
VM_OBJECT_WUNLOCK(obj);
if (bogus && buf_mapped(bp)) {
BUF_CHECK_MAPPED(bp);
pmap_qenter(trunc_page((vm_offset_t)bp->b_data),
@ -4686,8 +4676,6 @@ vfs_bio_set_valid(struct buf *bp, int base, int size)
base += (bp->b_offset & PAGE_MASK);
n = PAGE_SIZE - (base & PAGE_MASK);
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
/*
* Busy may not be strictly necessary here because the pages are
* unlikely to be fully valid and the vnode lock will synchronize
@ -4705,7 +4693,6 @@ vfs_bio_set_valid(struct buf *bp, int base, int size)
n = PAGE_SIZE;
}
vfs_busy_pages_release(bp);
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
}
/*
@ -4731,22 +4718,7 @@ vfs_bio_clrbuf(struct buf *bp)
}
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
vfs_busy_pages_acquire(bp);
if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
(bp->b_offset & PAGE_MASK) == 0) {
if (bp->b_pages[0] == bogus_page)
goto unlock;
mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[0]->object);
if ((bp->b_pages[0]->valid & mask) == mask)
goto unlock;
if ((bp->b_pages[0]->valid & mask) == 0) {
pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize);
bp->b_pages[0]->valid |= mask;
goto unlock;
}
}
sa = bp->b_offset & PAGE_MASK;
slide = 0;
for (i = 0; i < bp->b_npages; i++, sa = 0) {
@ -4758,7 +4730,6 @@ vfs_bio_clrbuf(struct buf *bp)
continue;
j = sa / DEV_BSIZE;
mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[i]->object);
if ((bp->b_pages[i]->valid & mask) == mask)
continue;
if ((bp->b_pages[i]->valid & mask) == 0)
@ -4771,11 +4742,10 @@ vfs_bio_clrbuf(struct buf *bp)
}
}
}
bp->b_pages[i]->valid |= mask;
vm_page_set_valid_range(bp->b_pages[i], j * DEV_BSIZE,
roundup2(ea - sa, DEV_BSIZE));
}
unlock:
vfs_busy_pages_release(bp);
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
bp->b_resid = 0;
}
@ -5186,11 +5156,9 @@ vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count,
br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
!= 0) ? GB_UNMAPPED : 0;
VM_OBJECT_WLOCK(object);
again:
for (i = 0; i < count; i++)
vm_page_busy_downgrade(ma[i]);
VM_OBJECT_WUNLOCK(object);
lbnp = -1;
for (i = 0; i < count; i++) {
@ -5249,11 +5217,9 @@ vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count,
vm_page_all_valid(m) || i == count - 1,
("buf %d %p invalid", i, m));
if (i == count - 1 && lpart) {
VM_OBJECT_WLOCK(object);
if (!vm_page_none_valid(m) &&
!vm_page_all_valid(m))
vm_page_zero_invalid(m, TRUE);
VM_OBJECT_WUNLOCK(object);
}
next_page:;
}
@ -5281,9 +5247,9 @@ next_page:;
if (!vm_page_all_valid(ma[i]))
redo = true;
}
VM_OBJECT_WUNLOCK(object);
if (redo && error == 0)
goto again;
VM_OBJECT_WUNLOCK(object);
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}

View File

@ -417,11 +417,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
inc = btodb(size);
for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
if (i == 0) {
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
vm_object_pip_add(tbp->b_bufobj->bo_object,
tbp->b_npages);
vfs_busy_pages_acquire(tbp);
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
} else {
if ((bp->b_npages * PAGE_SIZE) +
round_page(size) > vp->v_mount->mnt_iosize_max) {
@ -458,13 +456,11 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
*/
off = tbp->b_offset;
tsize = size;
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
for (j = 0; tsize > 0; j++) {
toff = off & PAGE_MASK;
tinc = tsize;
if (toff + tinc > PAGE_SIZE)
tinc = PAGE_SIZE - toff;
VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object);
if (vm_page_trysbusy(tbp->b_pages[j]) == 0)
break;
if ((tbp->b_pages[j]->valid &
@ -482,11 +478,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
j);
for (k = 0; k < j; k++)
vm_page_sunbusy(tbp->b_pages[k]);
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
bqrelse(tbp);
break;
}
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
/*
* Set a read-ahead mark as appropriate
@ -506,7 +500,6 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
if (tbp->b_blkno == tbp->b_lblkno) {
tbp->b_blkno = bn;
} else if (tbp->b_blkno != bn) {
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
goto clean_sbusy;
}
}
@ -517,9 +510,9 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
BUF_KERNPROC(tbp);
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
m = tbp->b_pages[j];
if ((bp->b_npages == 0) ||
(bp->b_pages[bp->b_npages-1] != m)) {
@ -529,7 +522,7 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
if (vm_page_all_valid(m))
tbp->b_pages[j] = bogus_page;
}
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
/*
* Don't inherit tbp->b_bufsize as it may be larger due to
* a non-page-aligned size. Instead just aggregate using
@ -547,13 +540,10 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
* Fully valid pages in the cluster are already good and do not need
* to be re-read from disk. Replace the page with bogus_page
*/
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
for (j = 0; j < bp->b_npages; j++) {
VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object);
if (vm_page_all_valid(bp->b_pages[j]))
bp->b_pages[j] = bogus_page;
}
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
if (bp->b_bufsize > bp->b_kvasize)
panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
bp->b_bufsize, bp->b_kvasize);
@ -988,7 +978,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
if (tbp->b_flags & B_VMIO) {
vm_page_t m;
VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
if (i == 0) {
vfs_busy_pages_acquire(tbp);
} else { /* if not first buffer */
@ -998,8 +987,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
for (j--; j >= 0; j--)
vm_page_sunbusy(
tbp->b_pages[j]);
VM_OBJECT_WUNLOCK(
tbp->b_object);
bqrelse(tbp);
goto finishcluster;
}
@ -1015,7 +1002,6 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
bp->b_npages++;
}
}
VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
}
bp->b_bcount += size;
bp->b_bufsize += size;