From b32cfc467b3f3a6a7bd00631334612f6181f7c08 Mon Sep 17 00:00:00 2001 From: Swapnil Ingle Date: Fri, 9 Apr 2021 12:18:41 -0400 Subject: [PATCH] nvmf: Support physical block size if exposed by bdev Today the in-guest nvme device shows physical_block_size=512 even though the backend iSCSI bdev supports physical_block_size=4K iSCSI targets exposes physical block size using logical_block_per_physical_block_exponent in READ_CAPACITY_16 NPWG is one of the way to let Linux nvme driver set physical_block_size of the nvme block device. This patch adds spdk_bdev.phys_blocklen which is updated if the iSCSI backend exposes physical_block_size. Later phys_blocklen is used in nvmf to set NPWG and NAWUPF to report back during NS identity. Linux driver uses min(nawupf, npwg) to set physical_block_size. Similarly in scsi_bdev fill lbppbe in READ_CAP16 response based on spdk_bdev.phys_blocklen. Fixes #1884 Signed-off-by: Swapnil Ingle Change-Id: I0b6c81f1937e346d448f49c927eda8c79d2d75cf Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7310 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris Reviewed-by: Ben Walker Reviewed-by: Tested-by: SPDK CI Jenkins --- examples/nvme/identify/identify.c | 4 ++++ include/spdk/bdev.h | 8 ++++++++ include/spdk/bdev_module.h | 3 +++ lib/bdev/bdev.c | 10 ++++++++++ lib/bdev/spdk_bdev.map | 1 + lib/nvmf/ctrlr_bdev.c | 10 ++++++++++ lib/scsi/scsi_bdev.c | 7 +++++++ module/bdev/iscsi/bdev_iscsi.c | 7 +++++-- test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c | 3 +++ test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c | 3 +++ 10 files changed, 54 insertions(+), 2 deletions(-) diff --git a/examples/nvme/identify/identify.c b/examples/nvme/identify/identify.c index f6ce6b73de..20a0d2d684 100644 --- a/examples/nvme/identify/identify.c +++ b/examples/nvme/identify/identify.c @@ -1014,6 +1014,10 @@ print_namespace(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns) printf(" Atomic Write Unit (PFail): %d\n", nsdata->nawupf + 1); } + if (nsdata->npwg) { + printf(" Preferred Write Granularity: %d\n", nsdata->npwg + 1); + } + if (nsdata->nacwu) { printf(" Atomic Compare & Write Unit: %d\n", nsdata->nacwu + 1); } diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 72f673b047..19b265735f 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -590,6 +590,14 @@ bool spdk_bdev_is_zoned(const struct spdk_bdev *bdev); */ uint32_t spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev); +/** + * Get block device physical block size. + * + * \param bdev Block device to query. + * \return Size of physical block size for this bdev in bytes. + */ +uint32_t spdk_bdev_get_physical_block_size(const struct spdk_bdev *bdev); + /** * Get DIF type of the block device. * diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index ac7739625b..70c37f4974 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -265,6 +265,9 @@ struct spdk_bdev { /** Size in bytes of a logical block for the backend */ uint32_t blocklen; + /** Size in bytes of a physical block for the backend */ + uint32_t phys_blocklen; + /** Number of blocks */ uint64_t blockcnt; diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 9c741d0016..9ede230c08 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -3291,6 +3291,12 @@ spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev) } } +uint32_t +spdk_bdev_get_physical_block_size(const struct spdk_bdev *bdev) +{ + return bdev->phys_blocklen; +} + static uint32_t _bdev_get_block_size_with_md(const struct spdk_bdev *bdev) { @@ -5464,6 +5470,10 @@ bdev_init(struct spdk_bdev *bdev) bdev->acwu = 1; } + if (bdev->phys_blocklen == 0) { + bdev->phys_blocklen = spdk_bdev_get_data_block_size(bdev); + } + TAILQ_INIT(&bdev->internal.open_descs); TAILQ_INIT(&bdev->internal.locked_ranges); TAILQ_INIT(&bdev->internal.pending_locked_ranges); diff --git a/lib/bdev/spdk_bdev.map b/lib/bdev/spdk_bdev.map index aec215d589..91d21d5db6 100644 --- a/lib/bdev/spdk_bdev.map +++ b/lib/bdev/spdk_bdev.map @@ -40,6 +40,7 @@ spdk_bdev_is_md_separate; spdk_bdev_is_zoned; spdk_bdev_get_data_block_size; + spdk_bdev_get_physical_block_size; spdk_bdev_get_dif_type; spdk_bdev_is_dif_head_of_md; spdk_bdev_is_dif_check_enabled; diff --git a/lib/nvmf/ctrlr_bdev.c b/lib/nvmf/ctrlr_bdev.c index a631b80033..b79254f798 100644 --- a/lib/nvmf/ctrlr_bdev.c +++ b/lib/nvmf/ctrlr_bdev.c @@ -140,6 +140,7 @@ nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *n { struct spdk_bdev *bdev = ns->bdev; uint64_t num_blocks; + uint32_t phys_blocklen; num_blocks = spdk_bdev_get_num_blocks(bdev); @@ -181,6 +182,15 @@ nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *n nsdata->lbaf[0].ms = 0; nsdata->lbaf[0].lbads = spdk_u32log2(spdk_bdev_get_data_block_size(bdev)); } + + phys_blocklen = spdk_bdev_get_physical_block_size(bdev); + assert(phys_blocklen > 0); + /* Linux driver uses min(nawupf, npwg) to set physical_block_size */ + nsdata->nsfeat.optperf = 1; + nsdata->nsfeat.ns_atomic_write_unit = 1; + nsdata->npwg = (phys_blocklen >> nsdata->lbaf[0].lbads) - 1; + nsdata->nawupf = nsdata->npwg; + nsdata->noiob = spdk_bdev_get_optimal_io_boundary(bdev); nsdata->nmic.can_share = 1; if (ns->ptpl_file != NULL) { diff --git a/lib/scsi/scsi_bdev.c b/lib/scsi/scsi_bdev.c index 06270eb5d1..ba890af741 100644 --- a/lib/scsi/scsi_bdev.c +++ b/lib/scsi/scsi_bdev.c @@ -1620,9 +1620,16 @@ bdev_scsi_process_block(struct spdk_scsi_task *task) switch (cdb[1] & 0x1f) { /* SERVICE ACTION */ case SPDK_SBC_SAI_READ_CAPACITY_16: { uint8_t buffer[32] = {0}; + uint32_t lbppb; to_be64(&buffer[0], spdk_bdev_get_num_blocks(bdev) - 1); to_be32(&buffer[8], spdk_bdev_get_data_block_size(bdev)); + lbppb = spdk_bdev_get_physical_block_size(bdev) / spdk_bdev_get_data_block_size(bdev); + if (spdk_u32log2(lbppb) > 0xf) { + SPDK_ERRLOG("lbppbe(0x%x) > 0xf\n", spdk_u32log2(lbppb)); + } else { + buffer[13] = spdk_u32log2(lbppb); + } /* * Set the TPE bit to 1 to indicate thin provisioning. * The position of TPE bit is the 7th bit in 14th byte diff --git a/module/bdev/iscsi/bdev_iscsi.c b/module/bdev/iscsi/bdev_iscsi.c index 7516ea95e4..edb5373458 100644 --- a/module/bdev/iscsi/bdev_iscsi.c +++ b/module/bdev/iscsi/bdev_iscsi.c @@ -623,7 +623,8 @@ static const struct spdk_bdev_fn_table iscsi_fn_table = { static int create_iscsi_lun(struct iscsi_context *context, int lun_id, char *url, char *initiator_iqn, char *name, - uint64_t num_blocks, uint32_t block_size, struct spdk_bdev **bdev, bool unmap_supported) + uint64_t num_blocks, uint32_t block_size, struct spdk_bdev **bdev, bool unmap_supported, + uint8_t lbppbe) { struct bdev_iscsi_lun *lun; int rc; @@ -645,6 +646,7 @@ create_iscsi_lun(struct iscsi_context *context, int lun_id, char *url, char *ini lun->bdev.product_name = "iSCSI LUN"; lun->bdev.module = &g_iscsi_bdev_module; lun->bdev.blocklen = block_size; + lun->bdev.phys_blocklen = block_size * (1 << lbppbe); lun->bdev.blockcnt = num_blocks; lun->bdev.ctxt = lun; lun->unmap_supported = unmap_supported; @@ -691,7 +693,8 @@ iscsi_readcapacity16_cb(struct iscsi_context *iscsi, int status, } status = create_iscsi_lun(req->context, req->lun, req->url, req->initiator_iqn, req->bdev_name, - readcap16->returned_lba + 1, readcap16->block_length, &bdev, req->unmap_supported); + readcap16->returned_lba + 1, readcap16->block_length, &bdev, req->unmap_supported, + readcap16->lbppbe); if (status) { SPDK_ERRLOG("Unable to create iscsi bdev: %s (%d)\n", spdk_strerror(-status), status); } diff --git a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c index e3801b57d4..7edb46dfc5 100644 --- a/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c +++ b/test/unit/lib/nvmf/ctrlr_bdev.c/ctrlr_bdev_ut.c @@ -51,6 +51,9 @@ DEFINE_STUB(spdk_bdev_get_acwu, uint16_t, (const struct spdk_bdev *bdev), 0); DEFINE_STUB(spdk_bdev_get_data_block_size, uint32_t, (const struct spdk_bdev *bdev), 512); +DEFINE_STUB(spdk_bdev_get_physical_block_size, uint32_t, + (const struct spdk_bdev *bdev), 4096); + DEFINE_STUB(nvmf_ctrlr_process_admin_cmd, int, (struct spdk_nvmf_request *req), 0); DEFINE_STUB(spdk_bdev_comparev_blocks, int, (struct spdk_bdev_desc *desc, diff --git a/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c b/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c index 87485b901c..3e1705477e 100644 --- a/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c +++ b/test/unit/lib/scsi/scsi_bdev.c/scsi_bdev_ut.c @@ -77,6 +77,9 @@ DEFINE_STUB(spdk_bdev_is_md_interleaved, bool, DEFINE_STUB(spdk_bdev_get_data_block_size, uint32_t, (const struct spdk_bdev *bdev), 512); +DEFINE_STUB(spdk_bdev_get_physical_block_size, uint32_t, + (const struct spdk_bdev *bdev), 4096); + uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) {