zfs: enable vn_io_fault support
Note that now we have to account for possible partial writes in dmu_write_uio_dbuf(). It seems that on illumos either all or none of the data are expected to be written. But the partial writes are quite expected when vn_io_fault support is enabled. Reviewed by: kib MFC after: 7 weeks Differential Revision: https://reviews.freebsd.org/D2790
This commit is contained in:
parent
50f2c01d32
commit
c2d36fc5cd
@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
|
||||
else
|
||||
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
|
||||
} else {
|
||||
#ifdef illumos
|
||||
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
||||
UIO_READ, uio);
|
||||
#else
|
||||
err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
|
||||
tocpy, uio);
|
||||
#endif
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||
else
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
#ifdef illumos
|
||||
/*
|
||||
* XXX uiomove could block forever (eg. nfs-backed
|
||||
* pages). There needs to be a uiolockdown() function
|
||||
@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||
*/
|
||||
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
||||
UIO_WRITE, uio);
|
||||
#else
|
||||
err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
|
||||
uio);
|
||||
#endif
|
||||
|
||||
if (tocpy == db->db_size)
|
||||
dmu_buf_fill_done(db, tx);
|
||||
|
@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
||||
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
|
||||
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
|
||||
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
|
||||
vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
|
||||
|
||||
/*
|
||||
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
||||
|
@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
|
||||
|
||||
zfs_vmobject_wunlock(obj);
|
||||
va = zfs_map_page(pp, &sf);
|
||||
#ifdef illumos
|
||||
error = uiomove(va + off, bytes, UIO_READ, uio);
|
||||
#else
|
||||
error = vn_io_fault_uiomove(va + off, bytes, uio);
|
||||
#endif
|
||||
zfs_unmap_page(sf);
|
||||
zfs_vmobject_wlock(obj);
|
||||
page_unhold(pp);
|
||||
@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
* holding up the transaction if the data copy hangs
|
||||
* up on a pagefault (e.g., from an NFS server mapping).
|
||||
*/
|
||||
#ifdef illumos
|
||||
size_t cbytes;
|
||||
#endif
|
||||
|
||||
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
|
||||
max_blksz);
|
||||
ASSERT(abuf != NULL);
|
||||
ASSERT(arc_buf_size(abuf) == max_blksz);
|
||||
#ifdef illumos
|
||||
if (error = uiocopy(abuf->b_data, max_blksz,
|
||||
UIO_WRITE, uio, &cbytes)) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
break;
|
||||
}
|
||||
ASSERT(cbytes == max_blksz);
|
||||
#else
|
||||
ssize_t resid = uio->uio_resid;
|
||||
error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
|
||||
if (error != 0) {
|
||||
uio->uio_offset -= resid - uio->uio_resid;
|
||||
uio->uio_resid = resid;
|
||||
dmu_return_arcbuf(abuf);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
|
||||
woff, abuf, tx);
|
||||
}
|
||||
#ifdef illumos
|
||||
ASSERT(tx_bytes <= uio->uio_resid);
|
||||
uioskip(uio, tx_bytes);
|
||||
#endif
|
||||
}
|
||||
if (tx_bytes && vn_has_cached_data(vp)) {
|
||||
update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
|
||||
@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
while ((end_size = zp->z_size) < uio->uio_loffset) {
|
||||
(void) atomic_cas_64(&zp->z_size, end_size,
|
||||
uio->uio_loffset);
|
||||
#ifdef illumos
|
||||
ASSERT(error == 0);
|
||||
#else
|
||||
ASSERT(error == 0 || error == EFAULT);
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
* If we are replaying and eof is non zero then force
|
||||
@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
|
||||
zp->z_size = zfsvfs->z_replay_eof;
|
||||
|
||||
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||
if (error == 0)
|
||||
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||
else
|
||||
(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||
|
||||
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
|
||||
dmu_tx_commit(tx);
|
||||
@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
return (error);
|
||||
}
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
/*
|
||||
* EFAULT means that at least one page of the source buffer was not
|
||||
* available. VFS will re-try remaining I/O upon this error.
|
||||
*/
|
||||
if (error == EFAULT) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ioflag & (FSYNC | FDSYNC) ||
|
||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||
zil_commit(zilog, zp->z_id);
|
||||
|
Loading…
x
Reference in New Issue
Block a user