freebsd-skq/sys/dev/nvme/nvme_ns.c
ken 5591de079d Change the way that unmapped I/O capability is advertised.
The previous method was to set the D_UNMAPPED_IO flag in the cdevsw
for the driver.  The problem with this is that in many cases (e.g.
sa(4)) there may be some instances of the driver that can handle
unmapped I/O and some that can't.  The isp(4) driver can handle
unmapped I/O, but the esp(4) driver currently cannot.  The cdevsw
is shared among all driver instances.

So instead of setting a flag on the cdevsw, set a flag on the cdev.
This allows drivers to indicate support for unmapped I/O on a
per-instance basis.

sys/conf.h:	Remove the D_UNMAPPED_IO cdevsw flag and replace it
		with an SI_UNMAPPED cdev flag.

kern_physio.c:	Look at the cdev SI_UNMAPPED flag to determine
		whether or not a particular driver can handle
		unmapped I/O.

geom_dev.c:	Set the SI_UNMAPPED flag for all GEOM cdevs.
		Since GEOM will create a temporary mapping when
		needed, setting SI_UNMAPPED unconditionally will
		work.

		Remove the D_UNMAPPED_IO flag.

nvme_ns.c:	Set the SI_UNMAPPED flag on cdevs created here
		if NVME_UNMAPPED_BIO_SUPPORT is enabled.

vfs_aio.c:	In aio_qphysio(), check the SI_UNMAPPED flag on a
		cdev instead of the D_UNMAPPED_IO flag on the cdevsw.

sys/param.h:	Bump __FreeBSD_version to 1000045 for the switch from
		setting the D_UNMAPPED_IO flag in the cdevsw to setting
		SI_UNMAPPED in the cdev.

Reviewed by:	kib, jimharris
MFC after:	1 week
Sponsored by:	Spectra Logic
2013-08-15 22:52:39 +00:00

363 lines
8.4 KiB
C

/*-
* Copyright (C) 2012-2013 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <dev/pci/pcivar.h>
#include "nvme_private.h"
static int
nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
{
struct nvme_namespace *ns;
struct nvme_controller *ctrlr;
struct nvme_pt_command *pt;
ns = cdev->si_drv1;
ctrlr = ns->ctrlr;
switch (cmd) {
case NVME_IO_TEST:
case NVME_BIO_TEST:
nvme_ns_test(ns, cmd, arg);
break;
case NVME_PASSTHROUGH_CMD:
pt = (struct nvme_pt_command *)arg;
return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id,
1 /* is_user_buffer */, 0 /* is_admin_cmd */));
case DIOCGMEDIASIZE:
*(off_t *)arg = (off_t)nvme_ns_get_size(ns);
break;
case DIOCGSECTORSIZE:
*(u_int *)arg = nvme_ns_get_sector_size(ns);
break;
default:
return (ENOTTY);
}
return (0);
}
static int
nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused,
struct thread *td)
{
int error = 0;
if (flags & FWRITE)
error = securelevel_gt(td->td_ucred, 0);
return (error);
}
static int
nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused,
struct thread *td)
{
return (0);
}
static void
nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl)
{
struct bio *bp = arg;
/*
* TODO: add more extensive translation of NVMe status codes
* to different bio error codes (i.e. EIO, EINVAL, etc.)
*/
if (nvme_completion_is_error(cpl)) {
bp->bio_error = EIO;
bp->bio_flags |= BIO_ERROR;
bp->bio_resid = bp->bio_bcount;
} else
bp->bio_resid = 0;
biodone(bp);
}
static void
nvme_ns_strategy(struct bio *bp)
{
struct nvme_namespace *ns;
int err;
ns = bp->bio_dev->si_drv1;
err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done);
if (err) {
bp->bio_error = err;
bp->bio_flags |= BIO_ERROR;
bp->bio_resid = bp->bio_bcount;
biodone(bp);
}
}
static struct cdevsw nvme_ns_cdevsw = {
.d_version = D_VERSION,
.d_flags = D_DISK,
.d_read = physread,
.d_write = physwrite,
.d_open = nvme_ns_open,
.d_close = nvme_ns_close,
.d_strategy = nvme_ns_strategy,
.d_ioctl = nvme_ns_ioctl
};
uint32_t
nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns)
{
return ns->ctrlr->max_xfer_size;
}
uint32_t
nvme_ns_get_sector_size(struct nvme_namespace *ns)
{
return (1 << ns->data.lbaf[ns->data.flbas.format].lbads);
}
uint64_t
nvme_ns_get_num_sectors(struct nvme_namespace *ns)
{
return (ns->data.nsze);
}
uint64_t
nvme_ns_get_size(struct nvme_namespace *ns)
{
return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns));
}
uint32_t
nvme_ns_get_flags(struct nvme_namespace *ns)
{
return (ns->flags);
}
const char *
nvme_ns_get_serial_number(struct nvme_namespace *ns)
{
return ((const char *)ns->ctrlr->cdata.sn);
}
const char *
nvme_ns_get_model_number(struct nvme_namespace *ns)
{
return ((const char *)ns->ctrlr->cdata.mn);
}
const struct nvme_namespace_data *
nvme_ns_get_data(struct nvme_namespace *ns)
{
return (&ns->data);
}
static void
nvme_ns_bio_done(void *arg, const struct nvme_completion *status)
{
struct bio *bp = arg;
nvme_cb_fn_t bp_cb_fn;
bp_cb_fn = bp->bio_driver1;
if (bp->bio_driver2)
free(bp->bio_driver2, M_NVME);
bp_cb_fn(bp, status);
}
int
nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
nvme_cb_fn_t cb_fn)
{
struct nvme_dsm_range *dsm_range;
int err;
bp->bio_driver1 = cb_fn;
switch (bp->bio_cmd) {
case BIO_READ:
err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp);
break;
case BIO_WRITE:
err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp);
break;
case BIO_FLUSH:
err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp);
break;
case BIO_DELETE:
dsm_range =
malloc(sizeof(struct nvme_dsm_range), M_NVME,
M_ZERO | M_WAITOK);
dsm_range->length =
bp->bio_bcount/nvme_ns_get_sector_size(ns);
dsm_range->starting_lba =
bp->bio_offset/nvme_ns_get_sector_size(ns);
bp->bio_driver2 = dsm_range;
err = nvme_ns_cmd_deallocate(ns, dsm_range, 1,
nvme_ns_bio_done, bp);
if (err != 0)
free(dsm_range, M_NVME);
break;
default:
err = EIO;
break;
}
return (err);
}
#ifdef CHATHAM2
static void
nvme_ns_populate_chatham_data(struct nvme_namespace *ns)
{
struct nvme_controller *ctrlr;
struct nvme_namespace_data *nsdata;
ctrlr = ns->ctrlr;
nsdata = &ns->data;
nsdata->nsze = ctrlr->chatham_lbas;
nsdata->ncap = ctrlr->chatham_lbas;
nsdata->nuse = ctrlr->chatham_lbas;
/* Chatham2 doesn't support thin provisioning. */
nsdata->nsfeat.thin_prov = 0;
/* Set LBA size to 512 bytes. */
nsdata->lbaf[0].lbads = 9;
}
#endif /* CHATHAM2 */
int
nvme_ns_construct(struct nvme_namespace *ns, uint16_t id,
struct nvme_controller *ctrlr)
{
struct nvme_completion_poll_status status;
ns->ctrlr = ctrlr;
ns->id = id;
/*
* Namespaces are reconstructed after a controller reset, so check
* to make sure we only call mtx_init once on each mtx.
*
* TODO: Move this somewhere where it gets called at controller
* construction time, which is not invoked as part of each
* controller reset.
*/
if (!mtx_initialized(&ns->lock))
mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF);
#ifdef CHATHAM2
if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID)
nvme_ns_populate_chatham_data(ns);
else {
#endif
status.done = FALSE;
nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data,
nvme_completion_poll_cb, &status);
while (status.done == FALSE)
DELAY(5);
if (nvme_completion_is_error(&status.cpl)) {
nvme_printf(ctrlr, "nvme_identify_namespace failed\n");
return (ENXIO);
}
#ifdef CHATHAM2
}
#endif
/*
* Note: format is a 0-based value, so > is appropriate here,
* not >=.
*/
if (ns->data.flbas.format > ns->data.nlbaf) {
printf("lba format %d exceeds number supported (%d)\n",
ns->data.flbas.format, ns->data.nlbaf+1);
return (1);
}
if (ctrlr->cdata.oncs.dsm)
ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED;
if (ctrlr->cdata.vwc.present)
ns->flags |= NVME_NS_FLUSH_SUPPORTED;
/*
* cdev may have already been created, if we are reconstructing the
* namespace after a controller-level reset.
*/
if (ns->cdev != NULL)
return (0);
/*
* MAKEDEV_ETERNAL was added in r210923, for cdevs that will never
* be destroyed. This avoids refcounting on the cdev object.
* That should be OK case here, as long as we're not supporting PCIe
* surprise removal nor namespace deletion.
*/
#ifdef MAKEDEV_ETERNAL_KLD
ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0,
NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
device_get_unit(ctrlr->dev), ns->id);
#else
ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0,
NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d",
device_get_unit(ctrlr->dev), ns->id);
#endif
#ifdef NVME_UNMAPPED_BIO_SUPPORT
ns->cdev->si_flags |= SI_UNMAPPED;
#endif
if (ns->cdev != NULL)
ns->cdev->si_drv1 = ns;
return (0);
}
void nvme_ns_destruct(struct nvme_namespace *ns)
{
if (ns->cdev != NULL)
destroy_dev(ns->cdev);
}