zfs: enable vn_io_fault support
Note that now we have to account for possible partial writes in dmu_write_uio_dbuf(). It seems that on illumos either all or none of the data are expected to be written. But the partial writes are quite expected when vn_io_fault support is enabled. Reviewed by: kib MFC after: 7 weeks Differential Revision: https://reviews.freebsd.org/D2790
This commit is contained in:
parent
50f2c01d32
commit
c2d36fc5cd
@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
|
|||||||
else
|
else
|
||||||
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
|
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef illumos
|
||||||
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
||||||
UIO_READ, uio);
|
UIO_READ, uio);
|
||||||
|
#else
|
||||||
|
err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
|
||||||
|
tocpy, uio);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
if (err)
|
if (err)
|
||||||
break;
|
break;
|
||||||
@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
|||||||
else
|
else
|
||||||
dmu_buf_will_dirty(db, tx);
|
dmu_buf_will_dirty(db, tx);
|
||||||
|
|
||||||
|
#ifdef illumos
|
||||||
/*
|
/*
|
||||||
* XXX uiomove could block forever (eg. nfs-backed
|
* XXX uiomove could block forever (eg. nfs-backed
|
||||||
* pages). There needs to be a uiolockdown() function
|
* pages). There needs to be a uiolockdown() function
|
||||||
@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
|||||||
*/
|
*/
|
||||||
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
err = uiomove((char *)db->db_data + bufoff, tocpy,
|
||||||
UIO_WRITE, uio);
|
UIO_WRITE, uio);
|
||||||
|
#else
|
||||||
|
err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
|
||||||
|
uio);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tocpy == db->db_size)
|
if (tocpy == db->db_size)
|
||||||
dmu_buf_fill_done(db, tx);
|
dmu_buf_fill_done(db, tx);
|
||||||
|
@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
|||||||
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
|
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
|
||||||
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
|
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
|
||||||
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
|
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
|
||||||
|
vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
||||||
|
@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
|
|||||||
|
|
||||||
zfs_vmobject_wunlock(obj);
|
zfs_vmobject_wunlock(obj);
|
||||||
va = zfs_map_page(pp, &sf);
|
va = zfs_map_page(pp, &sf);
|
||||||
|
#ifdef illumos
|
||||||
error = uiomove(va + off, bytes, UIO_READ, uio);
|
error = uiomove(va + off, bytes, UIO_READ, uio);
|
||||||
|
#else
|
||||||
|
error = vn_io_fault_uiomove(va + off, bytes, uio);
|
||||||
|
#endif
|
||||||
zfs_unmap_page(sf);
|
zfs_unmap_page(sf);
|
||||||
zfs_vmobject_wlock(obj);
|
zfs_vmobject_wlock(obj);
|
||||||
page_unhold(pp);
|
page_unhold(pp);
|
||||||
@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||||||
* holding up the transaction if the data copy hangs
|
* holding up the transaction if the data copy hangs
|
||||||
* up on a pagefault (e.g., from an NFS server mapping).
|
* up on a pagefault (e.g., from an NFS server mapping).
|
||||||
*/
|
*/
|
||||||
|
#ifdef illumos
|
||||||
size_t cbytes;
|
size_t cbytes;
|
||||||
|
#endif
|
||||||
|
|
||||||
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
|
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
max_blksz);
|
max_blksz);
|
||||||
ASSERT(abuf != NULL);
|
ASSERT(abuf != NULL);
|
||||||
ASSERT(arc_buf_size(abuf) == max_blksz);
|
ASSERT(arc_buf_size(abuf) == max_blksz);
|
||||||
|
#ifdef illumos
|
||||||
if (error = uiocopy(abuf->b_data, max_blksz,
|
if (error = uiocopy(abuf->b_data, max_blksz,
|
||||||
UIO_WRITE, uio, &cbytes)) {
|
UIO_WRITE, uio, &cbytes)) {
|
||||||
dmu_return_arcbuf(abuf);
|
dmu_return_arcbuf(abuf);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ASSERT(cbytes == max_blksz);
|
ASSERT(cbytes == max_blksz);
|
||||||
|
#else
|
||||||
|
ssize_t resid = uio->uio_resid;
|
||||||
|
error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
|
||||||
|
if (error != 0) {
|
||||||
|
uio->uio_offset -= resid - uio->uio_resid;
|
||||||
|
uio->uio_resid = resid;
|
||||||
|
dmu_return_arcbuf(abuf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||||||
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
|
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
|
||||||
woff, abuf, tx);
|
woff, abuf, tx);
|
||||||
}
|
}
|
||||||
|
#ifdef illumos
|
||||||
ASSERT(tx_bytes <= uio->uio_resid);
|
ASSERT(tx_bytes <= uio->uio_resid);
|
||||||
uioskip(uio, tx_bytes);
|
uioskip(uio, tx_bytes);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
if (tx_bytes && vn_has_cached_data(vp)) {
|
if (tx_bytes && vn_has_cached_data(vp)) {
|
||||||
update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
|
update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
|
||||||
@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||||||
while ((end_size = zp->z_size) < uio->uio_loffset) {
|
while ((end_size = zp->z_size) < uio->uio_loffset) {
|
||||||
(void) atomic_cas_64(&zp->z_size, end_size,
|
(void) atomic_cas_64(&zp->z_size, end_size,
|
||||||
uio->uio_loffset);
|
uio->uio_loffset);
|
||||||
|
#ifdef illumos
|
||||||
ASSERT(error == 0);
|
ASSERT(error == 0);
|
||||||
|
#else
|
||||||
|
ASSERT(error == 0 || error == EFAULT);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* If we are replaying and eof is non zero then force
|
* If we are replaying and eof is non zero then force
|
||||||
@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||||||
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
|
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
|
||||||
zp->z_size = zfsvfs->z_replay_eof;
|
zp->z_size = zfsvfs->z_replay_eof;
|
||||||
|
|
||||||
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
if (error == 0)
|
||||||
|
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||||
|
else
|
||||||
|
(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
|
||||||
|
|
||||||
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
|
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
/*
|
||||||
|
* EFAULT means that at least one page of the source buffer was not
|
||||||
|
* available. VFS will re-try remaining I/O upon this error.
|
||||||
|
*/
|
||||||
|
if (error == EFAULT) {
|
||||||
|
ZFS_EXIT(zfsvfs);
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (ioflag & (FSYNC | FDSYNC) ||
|
if (ioflag & (FSYNC | FDSYNC) ||
|
||||||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
|
||||||
zil_commit(zilog, zp->z_id);
|
zil_commit(zilog, zp->z_id);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user