9539 Make zvol operations use _by_dnode routines
Continues what was started in 7801 add more by-dnode routines by fully converting zvols to avoid unnecessary dnode_hold() calls. This saves a small amount of CPU time and slightly improves latencies of operations on zvols. illumos/illumos-gate@8dfe5547fb Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Rick McNeal <rick.mcneal@nexenta.com> Approved by: Dan McDonald <danmcd@joyent.com> Author: Richard Yao <richard.yao@prophetstor.com>
This commit is contained in:
parent
903252254b
commit
8398b8e966
@ -443,7 +443,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
|
|||||||
* and can induce severe lock contention when writing to several files
|
* and can induce severe lock contention when writing to several files
|
||||||
* whose dnodes are in the same block.
|
* whose dnodes are in the same block.
|
||||||
*/
|
*/
|
||||||
static int
|
int
|
||||||
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
|
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
|
||||||
{
|
{
|
||||||
@ -1302,7 +1302,7 @@ xuio_stat_wbuf_nocopy(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
static int
|
int
|
||||||
dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
|
dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
@ -1411,7 +1411,7 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
|
|||||||
return (err);
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_t **dbp;
|
dmu_buf_t **dbp;
|
||||||
@ -1600,22 +1600,17 @@ dmu_return_arcbuf(arc_buf_t *buf)
|
|||||||
* dmu_write().
|
* dmu_write().
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
dmu_assign_arcbuf_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
|
||||||
dmu_tx_t *tx)
|
dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
|
|
||||||
dnode_t *dn;
|
|
||||||
dmu_buf_impl_t *db;
|
dmu_buf_impl_t *db;
|
||||||
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
|
||||||
uint64_t blkid;
|
uint64_t blkid;
|
||||||
|
|
||||||
DB_DNODE_ENTER(dbuf);
|
|
||||||
dn = DB_DNODE(dbuf);
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
blkid = dbuf_whichblock(dn, 0, offset);
|
blkid = dbuf_whichblock(dn, 0, offset);
|
||||||
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
|
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
DB_DNODE_EXIT(dbuf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We can only assign if the offset is aligned, the arc buf is the
|
* We can only assign if the offset is aligned, the arc buf is the
|
||||||
@ -1632,11 +1627,8 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
|||||||
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
|
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
|
||||||
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
|
||||||
|
|
||||||
DB_DNODE_ENTER(dbuf);
|
|
||||||
dn = DB_DNODE(dbuf);
|
|
||||||
os = dn->dn_objset;
|
os = dn->dn_objset;
|
||||||
object = dn->dn_object;
|
object = dn->dn_object;
|
||||||
DB_DNODE_EXIT(dbuf);
|
|
||||||
|
|
||||||
dbuf_rele(db, FTAG);
|
dbuf_rele(db, FTAG);
|
||||||
dmu_write(os, object, offset, blksz, buf->b_data, tx);
|
dmu_write(os, object, offset, blksz, buf->b_data, tx);
|
||||||
@ -1645,6 +1637,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||||
|
dmu_tx_t *tx)
|
||||||
|
{
|
||||||
|
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
|
||||||
|
|
||||||
|
DB_DNODE_ENTER(dbuf);
|
||||||
|
dmu_assign_arcbuf_dnode(DB_DNODE(dbuf), offset, buf, tx);
|
||||||
|
DB_DNODE_EXIT(dbuf);
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
dbuf_dirty_record_t *dsa_dr;
|
dbuf_dirty_record_t *dsa_dr;
|
||||||
dmu_sync_cb_t *dsa_done;
|
dmu_sync_cb_t *dsa_done;
|
||||||
|
@ -517,6 +517,9 @@ uint64_t dmu_buf_refcount(dmu_buf_t *db);
|
|||||||
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
||||||
uint64_t length, boolean_t read, void *tag,
|
uint64_t length, boolean_t read, void *tag,
|
||||||
int *numbufsp, dmu_buf_t ***dbpp);
|
int *numbufsp, dmu_buf_t ***dbpp);
|
||||||
|
int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||||
|
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp,
|
||||||
|
uint32_t flags);
|
||||||
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
|
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
|
||||||
|
|
||||||
typedef void dmu_buf_evict_func_t(void *user_ptr);
|
typedef void dmu_buf_evict_func_t(void *user_ptr);
|
||||||
@ -755,14 +758,19 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
|
||||||
int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
|
int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
|
||||||
|
int dmu_read_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size);
|
||||||
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
|
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
|
int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
|
||||||
|
dmu_tx_t *tx);
|
||||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||||
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
||||||
void dmu_return_arcbuf(struct arc_buf *buf);
|
void dmu_return_arcbuf(struct arc_buf *buf);
|
||||||
|
void dmu_assign_arcbuf_dnode(dnode_t *handle, uint64_t offset,
|
||||||
|
struct arc_buf *buf, dmu_tx_t *tx);
|
||||||
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
|
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
|
||||||
dmu_tx_t *tx);
|
dmu_tx_t *tx);
|
||||||
int dmu_xuio_init(struct xuio *uio, int niov);
|
int dmu_xuio_init(struct xuio *uio, int niov);
|
||||||
|
@ -128,7 +128,7 @@ typedef struct zvol_state {
|
|||||||
zilog_t *zv_zilog; /* ZIL handle */
|
zilog_t *zv_zilog; /* ZIL handle */
|
||||||
list_t zv_extents; /* List of extents for dump */
|
list_t zv_extents; /* List of extents for dump */
|
||||||
znode_t zv_znode; /* for range locking */
|
znode_t zv_znode; /* for range locking */
|
||||||
dmu_buf_t *zv_dbuf; /* bonus handle */
|
dnode_t *zv_dn; /* dnode hold */
|
||||||
} zvol_state_t;
|
} zvol_state_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -646,7 +646,7 @@ zvol_first_open(zvol_state_t *zv)
|
|||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
|
error = dnode_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dn);
|
||||||
if (error) {
|
if (error) {
|
||||||
dmu_objset_disown(os, zvol_tag);
|
dmu_objset_disown(os, zvol_tag);
|
||||||
return (error);
|
return (error);
|
||||||
@ -671,8 +671,8 @@ zvol_last_close(zvol_state_t *zv)
|
|||||||
zil_close(zv->zv_zilog);
|
zil_close(zv->zv_zilog);
|
||||||
zv->zv_zilog = NULL;
|
zv->zv_zilog = NULL;
|
||||||
|
|
||||||
dmu_buf_rele(zv->zv_dbuf, zvol_tag);
|
dnode_rele(zv->zv_dn, zvol_tag);
|
||||||
zv->zv_dbuf = NULL;
|
zv->zv_dn = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Evict cached data
|
* Evict cached data
|
||||||
@ -993,8 +993,6 @@ static int
|
|||||||
zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv = arg;
|
zvol_state_t *zv = arg;
|
||||||
objset_t *os = zv->zv_objset;
|
|
||||||
uint64_t object = ZVOL_OBJ;
|
|
||||||
uint64_t offset = lr->lr_offset;
|
uint64_t offset = lr->lr_offset;
|
||||||
uint64_t size = lr->lr_length; /* length of user data */
|
uint64_t size = lr->lr_length; /* length of user data */
|
||||||
dmu_buf_t *db;
|
dmu_buf_t *db;
|
||||||
@ -1018,7 +1016,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
|||||||
if (buf != NULL) { /* immediate write */
|
if (buf != NULL) { /* immediate write */
|
||||||
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
||||||
RL_READER);
|
RL_READER);
|
||||||
error = dmu_read(os, object, offset, size, buf,
|
error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
} else { /* indirect write */
|
} else { /* indirect write */
|
||||||
/*
|
/*
|
||||||
@ -1031,7 +1029,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
|
|||||||
offset = P2ALIGN(offset, size);
|
offset = P2ALIGN(offset, size);
|
||||||
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size,
|
||||||
RL_READER);
|
RL_READER);
|
||||||
error = dmu_buf_hold(os, object, offset, zgd, &db,
|
error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
|
||||||
DMU_READ_NO_PREFETCH);
|
DMU_READ_NO_PREFETCH);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
blkptr_t *bp = &lr->lr_blkptr;
|
blkptr_t *bp = &lr->lr_blkptr;
|
||||||
@ -1098,8 +1096,8 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
|
|||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
|
||||||
(wr_state == WR_COPIED ? len : 0));
|
(wr_state == WR_COPIED ? len : 0));
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
if (wr_state == WR_COPIED && dmu_read(zv->zv_objset,
|
if (wr_state == WR_COPIED && dmu_read_by_dnode(zv->zv_dn,
|
||||||
ZVOL_OBJ, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
|
off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
|
||||||
zil_itx_destroy(itx);
|
zil_itx_destroy(itx);
|
||||||
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
|
||||||
lr = (lr_write_t *)&itx->itx_lr;
|
lr = (lr_write_t *)&itx->itx_lr;
|
||||||
@ -1468,7 +1466,7 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
|
|||||||
dmu_tx_abort(tx);
|
dmu_tx_abort(tx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
|
error = dmu_write_uio_dnode(zv->zv_dn, uio, bytes, tx);
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
zvol_log_write(zv, tx, off, bytes, sync);
|
zvol_log_write(zv, tx, off, bytes, sync);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
@ -1549,7 +1547,7 @@ zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
|
|||||||
int
|
int
|
||||||
zvol_get_volume_params(minor_t minor, uint64_t *blksize,
|
zvol_get_volume_params(minor_t minor, uint64_t *blksize,
|
||||||
uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
|
uint64_t *max_xfer_len, void **minor_hdl, void **objset_hdl, void **zil_hdl,
|
||||||
void **rl_hdl, void **bonus_hdl)
|
void **rl_hdl, void **dnode_hdl)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
zvol_state_t *zv;
|
||||||
|
|
||||||
@ -1560,7 +1558,7 @@ zvol_get_volume_params(minor_t minor, uint64_t *blksize,
|
|||||||
return (SET_ERROR(ENXIO));
|
return (SET_ERROR(ENXIO));
|
||||||
|
|
||||||
ASSERT(blksize && max_xfer_len && minor_hdl &&
|
ASSERT(blksize && max_xfer_len && minor_hdl &&
|
||||||
objset_hdl && zil_hdl && rl_hdl && bonus_hdl);
|
objset_hdl && zil_hdl && rl_hdl && dnode_hdl);
|
||||||
|
|
||||||
*blksize = zv->zv_volblocksize;
|
*blksize = zv->zv_volblocksize;
|
||||||
*max_xfer_len = (uint64_t)zvol_maxphys;
|
*max_xfer_len = (uint64_t)zvol_maxphys;
|
||||||
@ -1568,7 +1566,7 @@ zvol_get_volume_params(minor_t minor, uint64_t *blksize,
|
|||||||
*objset_hdl = zv->zv_objset;
|
*objset_hdl = zv->zv_objset;
|
||||||
*zil_hdl = zv->zv_zilog;
|
*zil_hdl = zv->zv_zilog;
|
||||||
*rl_hdl = &zv->zv_znode;
|
*rl_hdl = &zv->zv_znode;
|
||||||
*bonus_hdl = zv->zv_dbuf;
|
*dnode_hdl = zv->zv_dn;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user