Add BIO_DELETE support to ZVOL.

It is an adapted merge from the vendor branch of:
    701 UNMAP support for COMSTAR  (in part related to ZFS)
    2130 zvol DKIOCFREE uses nested DMU transactions
This commit is contained in:
Alexander Motin 2014-04-03 15:04:32 +00:00
parent 3b485e461d
commit 4a03e8b64d

View File

@ -25,9 +25,12 @@
* All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*
* Portions Copyright 2010 Robert Milkowski
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
/* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
/*
@ -153,6 +156,8 @@ int zvol_maxphys = DMU_MAX_ACCESS/2;
extern int zfs_set_prop_nvlist(const char *, zprop_source_t,
nvlist_t *, nvlist_t *);
static void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len, boolean_t sync);
static int zvol_remove_zv(zvol_state_t *);
static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
static int zvol_dumpify(zvol_state_t *zv);
@ -385,6 +390,24 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
ASSERT(error == 0);
}
/*
* Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
* implement DKIOCFREE/free-long-range.
*/
static int
zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
{
uint64_t offset, length;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
offset = lr->lr_offset;
length = lr->lr_length;
return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
}
/*
* Replay a TX_WRITE ZIL transaction that didn't get committed
* after a system failure
@ -435,7 +458,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
/*
* Callback vectors for replaying records.
* Only TX_WRITE is needed for zvol.
* Only TX_WRITE and TX_TRUNCATE are needed for zvol.
*/
zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* 0 no such transaction type */
@ -448,7 +471,7 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_LINK */
zvol_replay_err, /* TX_RENAME */
zvol_replay_write, /* TX_WRITE */
zvol_replay_err, /* TX_TRUNCATE */
zvol_replay_truncate, /* TX_TRUNCATE */
zvol_replay_err, /* TX_SETATTR */
zvol_replay_err, /* TX_ACL */
zvol_replay_err, /* TX_CREATE_ACL */
@ -1316,6 +1339,21 @@ zvol_strategy(struct bio *bp)
rl = zfs_range_lock(&zv->zv_znode, off, resid,
doread ? RL_READER : RL_WRITER);
if (bp->bio_cmd == BIO_DELETE) {
dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error != 0) {
dmu_tx_abort(tx);
} else {
zvol_log_truncate(zv, tx, off, resid, B_TRUE);
dmu_tx_commit(tx);
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
off, resid);
resid = 0;
}
goto unlock;
}
while (resid != 0 && off < volsize) {
size_t size = MIN(resid, zvol_maxphys);
#ifdef illumos
@ -1351,6 +1389,7 @@ zvol_strategy(struct bio *bp)
addr += size;
resid -= size;
}
unlock:
zfs_range_unlock(rl);
bp->bio_completed = bp->bio_length - resid;
@ -1648,9 +1687,36 @@ zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
/*
* END entry points to allow external callers access to the volume.
*/
#endif /* sun */
/*
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
*/
static void
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
boolean_t sync)
{
itx_t *itx;
lr_truncate_t *lr;
zilog_t *zilog = zv->zv_zilog;
if (zil_replaying(zilog, tx))
return;
itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
lr = (lr_truncate_t *)&itx->itx_lr;
lr->lr_foid = ZVOL_OBJ;
lr->lr_offset = off;
lr->lr_length = len;
itx->itx_sync = sync;
zil_itx_assign(zilog, itx, tx);
}
#ifdef sun
/*
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
* Also a dirtbag dkio ioctl for unmap/free-block functionality.
*/
/*ARGSUSED*/
int
@ -2271,12 +2337,12 @@ zvol_geom_start(struct bio *bp)
break;
case BIO_READ:
case BIO_WRITE:
case BIO_DELETE:
if (!THREAD_CAN_SLEEP())
goto enqueue;
zvol_strategy(bp);
break;
case BIO_GETATTR:
case BIO_DELETE:
default:
g_io_deliver(bp, EOPNOTSUPP);
break;