zfs: enable vn_io_fault support

Note that now we have to account for possible partial writes
in dmu_write_uio_dbuf().  It seems that on illumos either all or none
of the data are expected to be written.  But the partial writes are
quite expected when vn_io_fault support is enabled.

Reviewed by:	kib
MFC after:	7 weeks
Differential Revision: https://reviews.freebsd.org/D2790
This commit is contained in:
Andriy Gapon 2016-04-16 07:35:53 +00:00
parent 50f2c01d32
commit c2d36fc5cd
3 changed files with 49 additions and 1 deletions

View File

@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
else
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
} else {
#ifdef illumos
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_READ, uio);
#else
err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
tocpy, uio);
#endif
}
if (err)
break;
@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
else
dmu_buf_will_dirty(db, tx);
#ifdef illumos
/*
* XXX uiomove could block forever (eg. nfs-backed
* pages). There needs to be a uiolockdown() function
@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
*/
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_WRITE, uio);
#else
err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
uio);
#endif
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);

View File

@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
/*
* The fsid is 64 bits, composed of an 8-bit fs type, which

View File

@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
zfs_vmobject_wunlock(obj);
va = zfs_map_page(pp, &sf);
#ifdef illumos
error = uiomove(va + off, bytes, UIO_READ, uio);
#else
error = vn_io_fault_uiomove(va + off, bytes, uio);
#endif
zfs_unmap_page(sf);
zfs_vmobject_wlock(obj);
page_unhold(pp);
@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
* holding up the transaction if the data copy hangs
* up on a pagefault (e.g., from an NFS server mapping).
*/
#ifdef illumos
size_t cbytes;
#endif
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
max_blksz);
ASSERT(abuf != NULL);
ASSERT(arc_buf_size(abuf) == max_blksz);
#ifdef illumos
if (error = uiocopy(abuf->b_data, max_blksz,
UIO_WRITE, uio, &cbytes)) {
dmu_return_arcbuf(abuf);
break;
}
ASSERT(cbytes == max_blksz);
#else
ssize_t resid = uio->uio_resid;
error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
if (error != 0) {
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
dmu_return_arcbuf(abuf);
break;
}
#endif
}
/*
@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
woff, abuf, tx);
}
#ifdef illumos
ASSERT(tx_bytes <= uio->uio_resid);
uioskip(uio, tx_bytes);
#endif
}
if (tx_bytes && vn_has_cached_data(vp)) {
update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
while ((end_size = zp->z_size) < uio->uio_loffset) {
(void) atomic_cas_64(&zp->z_size, end_size,
uio->uio_loffset);
#ifdef illumos
ASSERT(error == 0);
#else
ASSERT(error == 0 || error == EFAULT);
#endif
}
/*
* If we are replaying and eof is non zero then force
@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
zp->z_size = zfsvfs->z_replay_eof;
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
if (error == 0)
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
else
(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
dmu_tx_commit(tx);
@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
return (error);
}
#ifdef __FreeBSD__
/*
* EFAULT means that at least one page of the source buffer was not
* available. VFS will re-try remaining I/O upon this error.
*/
if (error == EFAULT) {
ZFS_EXIT(zfsvfs);
return (error);
}
#endif
if (ioflag & (FSYNC | FDSYNC) ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, zp->z_id);