nvd: add hw.nvd.delete_max tunable

The NVMe specification does not define a maximum or optimal delete
size, so technically max delete size is min(full size of namespace,
2^32 - 1 LBAs).  A single delete operation for a multi-TB NVMe
namespace though may take much longer to complete than the nvme(4)
I/O timeout period.  So choose a sensible default here that is still
suitably large to minimize the number of overall delete operations.

This also fixes possible uint32_t overflow on initial TRIM operation
for zpool create operations for NVMe namespaces with >4G LBAs.

MFC after:	3 days
Sponsored by:	Intel
This commit is contained in:
Jim Harris 2016-01-28 23:15:14 +00:00
parent a4cab32319
commit aeae6079b4
2 changed files with 28 additions and 3 deletions

View File

@ -1,5 +1,5 @@
.\"
.\" Copyright (c) 2012-2014 Intel Corporation
.\" Copyright (c) 2012-2016 Intel Corporation
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
@ -33,7 +33,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd March 18, 2014
.Dd January 28, 2016
.Dt NVD 4
.Os
.Sh NAME
@ -74,6 +74,15 @@ Note that device nodes from the
driver are not
.Xr GEOM 4
disks and cannot be partitioned.
.Sh CONFIGURATION
The
.Nm
driver defines a system-wide maximum delete size for NVMe devices. The
default is 1GB. To select a different value, set the following tunable in
.Xr loader.conf 5 :
.Bd -literal -offset indent
hw.nvd.delete_max=<delete size in bytes>
.Ed
.Sh SEE ALSO
.Xr GEOM 4 ,
.Xr nvme 4 ,

View File

@ -1,5 +1,5 @@
/*-
* Copyright (C) 2012-2013 Intel Corporation
* Copyright (C) 2012-2016 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
@ -88,6 +89,19 @@ struct nvd_controller {
static TAILQ_HEAD(, nvd_controller) ctrlr_head;
static TAILQ_HEAD(disk_list, nvd_disk) disk_head;
static SYSCTL_NODE(_hw, OID_AUTO, nvd, CTLFLAG_RD, 0, "nvd driver parameters");
/*
* The NVMe specification does not define a maximum or optimal delete size, so
* technically max delete size is min(full size of the namespace, 2^32 - 1
* LBAs). A single delete for a multi-TB NVMe namespace though may take much
* longer to complete than the nvme(4) I/O timeout period. So choose a sensible
* default here that is still suitably large to minimize the number of overall
* delete operations.
*/
static uint64_t nvd_delete_max = (1024 * 1024 * 1024); /* 1GB */
SYSCTL_UQUAD(_hw_nvd, OID_AUTO, delete_max, CTLFLAG_RDTUN, &nvd_delete_max, 0,
"nvd maximum BIO_DELETE size in bytes");
static int nvd_modevent(module_t mod, int type, void *arg)
{
int error = 0;
@ -295,6 +309,8 @@ nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
disk->d_sectorsize = nvme_ns_get_sector_size(ns);
disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns);
if (disk->d_delmaxsize > nvd_delete_max)
disk->d_delmaxsize = nvd_delete_max;
disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns);
if (TAILQ_EMPTY(&disk_head))