From ce78d06b49e5462b3afb4cb522b019a8c95f39ae Mon Sep 17 00:00:00 2001 From: Xin LI Date: Mon, 6 Oct 2014 06:04:10 +0000 Subject: [PATCH] 5162 zfs recv should use loaned arc buffer to avoid copy Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Bayard Bell Reviewed by: Richard Elling Approved by: Garrett D'Amore Author: Matthew Ahrens illumos/illumos-gate@8a9047098ad8ce5afa38b6d012c8b509bb619f40 --- uts/common/fs/zfs/dmu.c | 9 ++++++- uts/common/fs/zfs/dmu_send.c | 46 +++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/uts/common/fs/zfs/dmu.c b/uts/common/fs/zfs/dmu.c index 619b3b4aa458..4fdcb1475018 100644 --- a/uts/common/fs/zfs/dmu.c +++ b/uts/common/fs/zfs/dmu.c @@ -1264,7 +1264,14 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(dbuf); - if (offset == db->db.db_offset && blksz == db->db.db_size) { + /* + * We can only assign if the offset is aligned, the arc buf is the + * same size as the dbuf, and the dbuf is not metadata. It + * can't be metadata because the loaned arc buf comes from the + * user-data kmem arena. + */ + if (offset == db->db.db_offset && blksz == db->db.db_size && + DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) { dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { diff --git a/uts/common/fs/zfs/dmu_send.c b/uts/common/fs/zfs/dmu_send.c index 97c9926bd79b..9b03eff7a06b 100644 --- a/uts/common/fs/zfs/dmu_send.c +++ b/uts/common/fs/zfs/dmu_send.c @@ -1212,11 +1212,13 @@ free_guid_map_onexit(void *arg) } static void * -restore_read(struct restorearg *ra, int len) +restore_read(struct restorearg *ra, int len, char *buf) { - void *rv; int done = 0; + if (buf == NULL) + buf = ra->buf; + /* some things will require 8-byte alignment, so everything must */ ASSERT0(len % 8); @@ -1224,7 +1226,7 @@ restore_read(struct restorearg *ra, int len) ssize_t resid; ra->err = vn_rdwr(UIO_READ, ra->vp, - (caddr_t)ra->buf + done, len - done, + buf + done, len - done, ra->voff, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); @@ -1237,12 +1239,11 @@ restore_read(struct restorearg *ra, int len) } ASSERT3U(done, ==, len); - rv = ra->buf; if (ra->byteswap) - fletcher_4_incremental_byteswap(rv, len, &ra->cksum); + fletcher_4_incremental_byteswap(buf, len, &ra->cksum); else - fletcher_4_incremental_native(rv, len, &ra->cksum); - return (rv); + fletcher_4_incremental_native(buf, len, &ra->cksum); + return (buf); } static void @@ -1357,7 +1358,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) return (SET_ERROR(EINVAL)); if (drro->drr_bonuslen) { - data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8)); + data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL); if (ra->err != 0) return (ra->err); } @@ -1454,19 +1455,30 @@ restore_write(struct restorearg *ra, objset_t *os, !DMU_OT_IS_VALID(drrw->drr_type)) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrw->drr_length); - if (data == NULL) - return (ra->err); - if (dmu_object_info(os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); + dmu_buf_t *bonus; + if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0) + return (SET_ERROR(EINVAL)); + + arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length); + + data = restore_read(ra, drrw->drr_length, abuf->b_data); + if (data == NULL) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); + return (ra->err); + } + tx = dmu_tx_create(os); dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_length); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { + dmu_return_arcbuf(abuf); + dmu_buf_rele(bonus, FTAG); dmu_tx_abort(tx); return (err); } @@ -1475,9 +1487,9 @@ restore_write(struct restorearg *ra, objset_t *os, DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length); } - dmu_write(os, drrw->drr_object, - drrw->drr_offset, drrw->drr_length, data, tx); + dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); dmu_tx_commit(tx); + dmu_buf_rele(bonus, FTAG); return (0); } @@ -1559,7 +1571,7 @@ restore_write_embedded(struct restorearg *ra, objset_t *os, if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); - data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8)); + data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL); if (data == NULL) return (ra->err); @@ -1594,7 +1606,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs) drrs->drr_length > SPA_MAXBLOCKSIZE) return (SET_ERROR(EINVAL)); - data = restore_read(ra, drrs->drr_length); + data = restore_read(ra, drrs->drr_length, NULL); if (data == NULL) return (ra->err); @@ -1735,7 +1747,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, */ pcksum = ra.cksum; while (ra.err == 0 && - NULL != (drr = restore_read(&ra, sizeof (*drr)))) { + NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) { if (issig(JUSTLOOKING) && issig(FORREAL)) { ra.err = SET_ERROR(EINTR); goto out;