From c7d346f269be866750c3fe277293134978544223 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Sun, 3 Feb 2013 18:42:20 +0000 Subject: [PATCH] zfs: fix, improve and re-organize page_lookup and page_unlock Now they are split into two pairs: page_hold/page_unhold for mappedread and page_busy/page_unbusy for update_pages. For mappedread we simply hold a page that is to be used as a source if it is resident and valid (and not busy). This is sufficient since we are only doing page -> user buffer copying. There is no page <-> backing storage I/O involved. update_pages is now better split to properly handle the putpages case (page -> arc) and the regular write case (arc -> page). For the latter we use complete protocol of marking an object with paging-in-progress and marking a page with io_start (busy count). Also, in this case we remove the write bit from all page mappings and clear dirty bits of the pages, the former is needed to ensure that the latter does the right thing. Additionally we update a page if it is cached instead of just freeing it as was done before. This needs to be verified. A minor detail: ZFS-backed pages should always be either fully valid or fully invalid. Assert this and use simpler API that does not deal with sub-page blocks. Reviewed by: kib MFC after: 26 days --- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 116 ++++++++++++++---- 1 file changed, 92 insertions(+), 24 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 330c7538fbd8..9438266ca4f8 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -323,7 +323,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred, } static vm_page_t -page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) +page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) { vm_object_t obj; vm_page_t pp; @@ -333,7 +333,7 @@ page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) for (;;) { if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - vm_page_is_valid(pp, (vm_offset_t)off, nbytes)) { + pp->valid) { if ((pp->oflags & VPO_BUSY) != 0) { /* * Reference the page before unlocking and @@ -344,13 +344,18 @@ page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) vm_page_sleep(pp, "zfsmwb"); continue; } - vm_page_busy(pp); - vm_page_undirty(pp); } else { - if (vm_page_is_cached(obj, OFF_TO_IDX(start))) - vm_page_cache_free(obj, OFF_TO_IDX(start), - OFF_TO_IDX(start) + 1); - pp = NULL; + pp = vm_page_alloc(obj, OFF_TO_IDX(start), + VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | + VM_ALLOC_NOBUSY); + } + + if (pp != NULL) { + ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); + vm_object_pip_add(obj, 1); + vm_page_io_start(pp); + pmap_remove_write(pp); + vm_page_clear_dirty(pp, off, nbytes); } break; } @@ -358,10 +363,55 @@ page_lookup(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) } static void -page_unlock(vm_page_t pp) +page_unbusy(vm_page_t pp) { - vm_page_wakeup(pp); + vm_page_io_finish(pp); + vm_object_pip_subtract(pp->object, 1); +} + +static vm_page_t +page_hold(vnode_t *vp, int64_t start) +{ + vm_object_t obj; + vm_page_t pp; + + obj = vp->v_object; + VM_OBJECT_LOCK_ASSERT(obj, MA_OWNED); + + for (;;) { + if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && + pp->valid) { + if ((pp->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_reference(pp); + vm_page_sleep(pp, "zfsmwb"); + continue; + } + + ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); + vm_page_lock(pp); + vm_page_hold(pp); + vm_page_unlock(pp); + + } else + pp = NULL; + break; + } + return (pp); +} + +static void +page_unhold(vm_page_t pp) +{ + + vm_page_lock(pp); + vm_page_unhold(pp); + vm_page_unlock(pp); } static caddr_t @@ -392,6 +442,7 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, { vm_object_t obj; struct sf_buf *sf; + caddr_t va; int off; ASSERT(vp->v_mount != NULL); @@ -402,27 +453,44 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, VM_OBJECT_LOCK(obj); for (start &= PAGEMASK; len > 0; start += PAGESIZE) { vm_page_t pp; - int nbytes = MIN(PAGESIZE - off, len); - - if ((pp = page_lookup(vp, start, off, nbytes)) != NULL) { - caddr_t va; + int nbytes = imin(PAGESIZE - off, len); + if (segflg == UIO_NOCOPY) { + pp = vm_page_lookup(obj, OFF_TO_IDX(start)); + KASSERT(pp != NULL, + ("zfs update_pages: NULL page in putpages case")); + KASSERT(off == 0, + ("zfs update_pages: unaligned data in putpages case")); + KASSERT(pp->valid == VM_PAGE_BITS_ALL, + ("zfs update_pages: invalid page in putpages case")); + KASSERT(pp->busy > 0, + ("zfs update_pages: unbusy page in putpages case")); + KASSERT(!pmap_page_is_write_mapped(pp), + ("zfs update_pages: writable page in putpages case")); VM_OBJECT_UNLOCK(obj); + va = zfs_map_page(pp, &sf); - if (segflg == UIO_NOCOPY) { - (void) dmu_write(os, oid, start+off, nbytes, - va+off, tx); - } else { - (void) dmu_read(os, oid, start+off, nbytes, - va+off, DMU_READ_PREFETCH); - } + (void) dmu_write(os, oid, start, nbytes, va, tx); zfs_unmap_page(sf); + VM_OBJECT_LOCK(obj); - page_unlock(pp); + vm_page_undirty(pp); + } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { + VM_OBJECT_UNLOCK(obj); + + va = zfs_map_page(pp, &sf); + (void) dmu_read(os, oid, start+off, nbytes, + va+off, DMU_READ_PREFETCH);; + zfs_unmap_page(sf); + + VM_OBJECT_LOCK(obj); + page_unbusy(pp); } len -= nbytes; off = 0; } + if (segflg != UIO_NOCOPY) + vm_object_pip_wakeupn(obj, 0); VM_OBJECT_UNLOCK(obj); } @@ -524,7 +592,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); - if (pp = page_lookup(vp, start, off, bytes)) { + if (pp = page_hold(vp, start)) { struct sf_buf *sf; caddr_t va; @@ -533,7 +601,7 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); VM_OBJECT_LOCK(obj); - page_unlock(pp); + page_unhold(pp); } else { VM_OBJECT_UNLOCK(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes);