Add VIRTIO_BLK_T_DISCARD support to the virtio-blk driver

If the hypervisor advertises support for the DISCARD command then the
guest can perform TRIM commands, freeing space on the backing store.

If VIRTIO_BLK_F_DISCARD is enabled, advertise DISKFLAG_CANDELETE

Tested with FreeBSD guests on bhyve and KVM

Reviewed by:	jhb
Tested by:	freqlabs
MFC after:	1 month
Relnotes:	yes
Sponsored by:	Klara Inc.
Differential Revision:	https://reviews.freebsd.org/D21708
This commit is contained in:
Allan Jude 2020-07-16 16:32:16 +00:00
parent d272016da4
commit 2cc8a524af
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=363255
2 changed files with 119 additions and 32 deletions

View File

@ -81,6 +81,7 @@ struct vtblk_softc {
#define VTBLK_FLAG_SUSPEND 0x0008
#define VTBLK_FLAG_BARRIER 0x0010
#define VTBLK_FLAG_WC_CONFIG 0x0020
#define VTBLK_FLAG_DISCARD 0x0040
struct virtqueue *vtblk_vq;
struct sglist *vtblk_sglist;
@ -112,6 +113,7 @@ static struct virtio_feature_desc vtblk_feature_desc[] = {
{ VIRTIO_BLK_F_WCE, "WriteCache" },
{ VIRTIO_BLK_F_TOPOLOGY, "Topology" },
{ VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
{ VIRTIO_BLK_F_DISCARD, "Discard" },
{ 0, NULL }
};
@ -210,6 +212,7 @@ TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
VIRTIO_BLK_F_WCE | \
VIRTIO_BLK_F_TOPOLOGY | \
VIRTIO_BLK_F_CONFIG_WCE | \
VIRTIO_BLK_F_DISCARD | \
VIRTIO_RING_F_INDIRECT_DESC)
#define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
@ -459,7 +462,7 @@ vtblk_config_change(device_t dev)
vtblk_read_config(sc, &blkcfg);
/* Capacity is always in 512-byte units. */
capacity = blkcfg.capacity * 512;
capacity = blkcfg.capacity * VTBLK_BSIZE;
if (sc->vtblk_disk->d_mediasize != capacity)
vtblk_resize_disk(sc, capacity);
@ -544,13 +547,14 @@ vtblk_strategy(struct bio *bp)
* be a better way to report our readonly'ness to GEOM above.
*/
if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
(bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
(bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH ||
bp->bio_cmd == BIO_DELETE)) {
vtblk_bio_done(sc, bp, EROFS);
return;
}
if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
(bp->bio_cmd != BIO_FLUSH)) {
(bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
vtblk_bio_done(sc, bp, EOPNOTSUPP);
return;
}
@ -563,6 +567,13 @@ vtblk_strategy(struct bio *bp)
return;
}
if ((bp->bio_cmd == BIO_DELETE) &&
!(sc->vtblk_flags & VTBLK_FLAG_DISCARD)) {
VTBLK_UNLOCK(sc);
vtblk_bio_done(sc, bp, EOPNOTSUPP);
return;
}
bioq_insert_tail(&sc->vtblk_bioq, bp);
vtblk_startio(sc);
@ -598,6 +609,8 @@ vtblk_setup_features(struct vtblk_softc *sc)
sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD))
sc->vtblk_flags |= VTBLK_FLAG_DISCARD;
}
static int
@ -687,12 +700,12 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
dp->d_dump = vtblk_dump;
/* Capacity is always in 512-byte units. */
dp->d_mediasize = blkcfg->capacity * 512;
dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
dp->d_sectorsize = blkcfg->blk_size;
else
dp->d_sectorsize = 512;
dp->d_sectorsize = VTBLK_BSIZE;
/*
* The VirtIO maximum I/O size is given in terms of segments.
@ -726,6 +739,11 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
dp->d_stripesize;
}
if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
dp->d_flags |= DISKFLAG_CANDELETE;
dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
}
if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
else
@ -876,11 +894,15 @@ vtblk_request_bio(struct vtblk_softc *sc)
break;
case BIO_READ:
req->vbr_hdr.type = VIRTIO_BLK_T_IN;
req->vbr_hdr.sector = bp->bio_offset / 512;
req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
break;
case BIO_WRITE:
req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
req->vbr_hdr.sector = bp->bio_offset / 512;
req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
break;
case BIO_DELETE:
req->vbr_hdr.type = VIRTIO_BLK_T_DISCARD;
req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE;
break;
default:
panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
@ -935,6 +957,20 @@ vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
/* BIO_READ means the host writes into our buffer. */
if (bp->bio_cmd == BIO_READ)
writable = sg->sg_nseg - 1;
} else if (bp->bio_cmd == BIO_DELETE) {
struct virtio_blk_discard_write_zeroes *discard;
discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
if (discard == NULL)
return (ENOMEM);
discard->sector = bp->bio_offset / VTBLK_BSIZE;
discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE;
bp->bio_driver1 = discard;
error = sglist_append(sg, discard, sizeof(*discard));
if (error || sg->sg_nseg == sg->sg_maxseg) {
panic("%s: bio %p data buffer too big %d",
__func__, bp, error);
}
}
writable++;
@ -1095,6 +1131,11 @@ vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
bp->bio_flags |= BIO_ERROR;
}
if (bp->bio_driver1 != NULL) {
free(bp->bio_driver1, M_DEVBUF);
bp->bio_driver1 = NULL;
}
biodone(bp);
}
@ -1124,7 +1165,12 @@ vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
blkcfg);
}
#undef VTBLK_GET_CONFIG
@ -1282,7 +1328,7 @@ vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
req->vbr_ack = -1;
req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
req->vbr_hdr.ioprio = 1;
req->vbr_hdr.sector = offset / 512;
req->vbr_hdr.sector = offset / VTBLK_BSIZE;
req->vbr_bp = &buf;
g_reset_bio(&buf);
@ -1331,7 +1377,7 @@ vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
/* Set either writeback (1) or writethrough (0) mode. */
virtio_write_dev_config_1(sc->vtblk_dev,
offsetof(struct virtio_blk_config, writeback), wc);
offsetof(struct virtio_blk_config, wce), wc);
}
static int
@ -1346,7 +1392,7 @@ vtblk_write_cache_enabled(struct vtblk_softc *sc,
if (wc >= 0 && wc < VTBLK_CACHE_MAX)
vtblk_set_write_cache(sc, wc);
else
wc = blkcfg->writeback;
wc = blkcfg->wce;
} else
wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);

View File

@ -33,19 +33,26 @@
#ifndef _VIRTIO_BLK_H
#define _VIRTIO_BLK_H
/* Feature bits */
#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */
#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */
#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */
#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */
#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/
#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_WCE 0x0200 /* Writeback mode enabled after reset */
#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */
#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */
#define VTBLK_BSIZE 512
#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
/* Feature bits */
#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */
#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */
#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */
#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */
#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/
#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */
#define VIRTIO_BLK_F_FLUSH 0x0200 /* Flush command supported */
#define VIRTIO_BLK_F_WCE 0x0200 /* Legacy alias for FLUSH */
#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */
#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */
#define VIRTIO_BLK_F_MQ 0x1000 /* Support more than one vq */
#define VIRTIO_BLK_F_DISCARD 0x2000 /* Trim blocks */
#define VIRTIO_BLK_F_WRITE_ZEROES 0x4000 /* Write zeros */
#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */
struct virtio_blk_config {
/* The capacity (in 512-byte sectors). */
@ -66,15 +73,29 @@ struct virtio_blk_config {
/* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */
struct virtio_blk_topology {
/* Exponent for physical block per logical block. */
uint8_t physical_block_exp;
/* Alignment offset in logical blocks. */
uint8_t alignment_offset;
/* Minimum I/O size without performance penalty in logical
* blocks. */
uint16_t min_io_size;
/* Optimal sustained I/O size in logical blocks. */
uint32_t opt_io_size;
} topology;
/* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
uint8_t writeback;
uint8_t wce;
uint8_t unused;
/* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */
uint16_t num_queues;
uint32_t max_discard_sectors;
uint32_t max_discard_seg;
uint32_t discard_sector_alignment;
uint32_t max_write_zeroes_sectors;
uint32_t max_write_zeroes_seg;
uint8_t write_zeroes_may_unmap;
uint8_t unused1[3];
} __packed;
/*
@ -89,23 +110,34 @@ struct virtio_blk_config {
*/
/* These two define direction. */
#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1
#define VIRTIO_BLK_T_IN 0
#define VIRTIO_BLK_T_OUT 1
/* This bit says it's a scsi command, not an actual read or write. */
#define VIRTIO_BLK_T_SCSI_CMD 2
#define VIRTIO_BLK_T_SCSI_CMD 2
#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
/* Cache flush command */
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_FLUSH_OUT 5
/* Get device ID command */
#define VIRTIO_BLK_T_GET_ID 8
#define VIRTIO_BLK_T_GET_ID 8
/* Discard command */
#define VIRTIO_BLK_T_DISCARD 11
/* Write zeros command */
#define VIRTIO_BLK_T_WRITE_ZEROES 13
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
#define VIRTIO_BLK_T_BARRIER 0x80000000
/* ID string length */
#define VIRTIO_BLK_ID_BYTES 20
#define VIRTIO_BLK_ID_BYTES 20
/* Unmap this range (only valid for write zeroes command) */
#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001
/* This is the first element of the read scatter-gather list. */
struct virtio_blk_outhdr {
@ -117,6 +149,15 @@ struct virtio_blk_outhdr {
uint64_t sector;
};
struct virtio_blk_discard_write_zeroes {
uint64_t sector;
uint32_t num_sectors;
struct {
uint32_t unmap:1;
uint32_t reserved:31;
} flags;
};
struct virtio_scsi_inhdr {
uint32_t errors;
uint32_t data_len;