Limit stripesize reported from nvd(4) to 4K
Intel NVMe controllers have a slow path for I/Os that span a 128KB stripe boundary but ZFS limits ashift, which is derived from d_stripesize, to 13 (8KB) so we limit the stripesize reported to geom(8) to 4KB. This may result in a small number of additional I/Os to require splitting in nvme(4), however the NVMe I/O path is very efficient so these additional I/Os will cause very minimal (if any) difference in performance or CPU utilisation. This can be controller by the new sysctl kern.nvme.max_optimal_sectorsize. MFC after: 1 week Sponsored by: Multiplay Differential Revision: https://reviews.freebsd.org/D4446
This commit is contained in:
parent
777d81af62
commit
fdf16a68ab
@ -279,7 +279,7 @@ nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
|
||||
disk->d_sectorsize = nvme_ns_get_sector_size(ns);
|
||||
disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
|
||||
disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns);
|
||||
disk->d_stripesize = nvme_ns_get_stripesize(ns);
|
||||
disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns);
|
||||
|
||||
if (TAILQ_EMPTY(&disk_head))
|
||||
disk->d_unit = 0;
|
||||
|
@ -870,6 +870,7 @@ const char * nvme_ns_get_serial_number(struct nvme_namespace *ns);
|
||||
const char * nvme_ns_get_model_number(struct nvme_namespace *ns);
|
||||
const struct nvme_namespace_data *
|
||||
nvme_ns_get_data(struct nvme_namespace *ns);
|
||||
uint32_t nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns);
|
||||
uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns);
|
||||
|
||||
int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
|
||||
|
@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "nvme_private.h"
|
||||
|
||||
extern int nvme_max_optimal_sectorsize;
|
||||
|
||||
static void nvme_bio_child_inbed(struct bio *parent, int bio_error);
|
||||
static void nvme_bio_child_done(void *arg,
|
||||
const struct nvme_completion *cpl);
|
||||
@ -217,6 +219,22 @@ nvme_ns_get_stripesize(struct nvme_namespace *ns)
|
||||
return (ns->stripesize);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
nvme_ns_get_optimal_sector_size(struct nvme_namespace *ns)
|
||||
{
|
||||
uint32_t stripesize;
|
||||
|
||||
stripesize = nvme_ns_get_stripesize(ns);
|
||||
|
||||
if (stripesize == 0)
|
||||
return nvme_ns_get_sector_size(ns);
|
||||
|
||||
if (nvme_max_optimal_sectorsize == 0)
|
||||
return (stripesize);
|
||||
|
||||
return (MIN(stripesize, nvme_max_optimal_sectorsize));
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
|
||||
{
|
||||
|
@ -33,6 +33,22 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "nvme_private.h"
|
||||
|
||||
SYSCTL_NODE(_kern, OID_AUTO, nvme, CTLFLAG_RD, 0, "NVM Express");
|
||||
/*
|
||||
* Intel NVMe controllers have a slow path for I/Os that span a 128KB
|
||||
* stripe boundary but ZFS limits ashift, which is derived from
|
||||
* d_stripesize, to 13 (8KB) so we limit the stripesize reported to
|
||||
* geom(8) to 4KB by default.
|
||||
*
|
||||
* This may result in a small number of additional I/Os to require
|
||||
* splitting in nvme(4), however the NVMe I/O path is very efficient
|
||||
* so these additional I/Os will cause very minimal (if any) difference
|
||||
* in performance or CPU utilisation.
|
||||
*/
|
||||
int nvme_max_optimal_sectorsize = 1<<12;
|
||||
SYSCTL_INT(_kern_nvme, OID_AUTO, max_optimal_sectorsize, CTLFLAG_RWTUN,
|
||||
&nvme_max_optimal_sectorsize, 0, "The maximum optimal sectorsize reported");
|
||||
|
||||
/*
|
||||
* CTLTYPE_S64 and sysctl_handle_64 were added in r217616. Define these
|
||||
* explicitly here for older kernels that don't include the r217616
|
||||
|
Loading…
x
Reference in New Issue
Block a user