Separate the pci attachment from the rest of nvme

Nvme drives can be attached in a number of different ways. Separate out the PCI
attachment so that we can have other attachment types, like ahci and various
types of NVMeoF.

Submitted by: cognet@
This commit is contained in:
Warner Losh 2019-08-21 22:17:55 +00:00
parent 99f13ae12c
commit f182f928db
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=351355
6 changed files with 348 additions and 303 deletions

View File

@ -2483,6 +2483,7 @@ dev/nvme/nvme_ctrlr.c optional nvme
dev/nvme/nvme_ctrlr_cmd.c optional nvme
dev/nvme/nvme_ns.c optional nvme
dev/nvme/nvme_ns_cmd.c optional nvme
dev/nvme/nvme_pci.c optional nvme pci
dev/nvme/nvme_qpair.c optional nvme
dev/nvme/nvme_sim.c optional nvme scbus
dev/nvme/nvme_sysctl.c optional nvme

View File

@ -36,9 +36,6 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include "nvme_private.h"
struct nvme_consumer {
@ -58,106 +55,7 @@ int32_t nvme_retry_count;
MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
static int nvme_probe(device_t);
static int nvme_attach(device_t);
static int nvme_detach(device_t);
static int nvme_shutdown(device_t);
static devclass_t nvme_devclass;
static device_method_t nvme_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, nvme_probe),
DEVMETHOD(device_attach, nvme_attach),
DEVMETHOD(device_detach, nvme_detach),
DEVMETHOD(device_shutdown, nvme_shutdown),
{ 0, 0 }
};
static driver_t nvme_pci_driver = {
"nvme",
nvme_pci_methods,
sizeof(struct nvme_controller),
};
DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, NULL);
MODULE_VERSION(nvme, 1);
MODULE_DEPEND(nvme, cam, 1, 1, 1);
static struct _pcsid
{
uint32_t devid;
int match_subdevice;
uint16_t subdevice;
const char *desc;
uint32_t quirks;
} pci_ids[] = {
{ 0x01118086, 0, 0, "NVMe Controller" },
{ IDT32_PCI_ID, 0, 0, "IDT NVMe Controller (32 channel)" },
{ IDT8_PCI_ID, 0, 0, "IDT NVMe Controller (8 channel)" },
{ 0x09538086, 1, 0x3702, "DC P3700 SSD" },
{ 0x09538086, 1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
{ 0x09538086, 1, 0x3704, "DC P3500 SSD [Add-in Card]" },
{ 0x09538086, 1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
{ 0x09538086, 1, 0x3709, "DC P3600 SSD [Add-in Card]" },
{ 0x09538086, 1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
{ 0x00031c58, 0, 0, "HGST SN100", QUIRK_DELAY_B4_CHK_RDY },
{ 0x00231c58, 0, 0, "WDC SN200", QUIRK_DELAY_B4_CHK_RDY },
{ 0x05401c5f, 0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
{ 0xa821144d, 0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
{ 0xa822144d, 0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
{ 0x01161179, 0, 0, "Toshiba XG5", QUIRK_DISABLE_TIMEOUT },
{ 0x00000000, 0, 0, NULL }
};
static int
nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
{
if (devid != ep->devid)
return 0;
if (!ep->match_subdevice)
return 1;
if (subdevice == ep->subdevice)
return 1;
else
return 0;
}
static int
nvme_probe (device_t device)
{
struct _pcsid *ep;
uint32_t devid;
uint16_t subdevice;
devid = pci_get_devid(device);
subdevice = pci_get_subdevice(device);
ep = pci_ids;
while (ep->devid) {
if (nvme_match(devid, subdevice, ep))
break;
++ep;
}
if (ep->desc) {
device_set_desc(device, ep->desc);
return (BUS_PROBE_DEFAULT);
}
#if defined(PCIS_STORAGE_NVM)
if (pci_get_class(device) == PCIC_STORAGE &&
pci_get_subclass(device) == PCIS_STORAGE_NVM &&
pci_get_progif(device) == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
device_set_desc(device, "Generic NVMe Device");
return (BUS_PROBE_GENERIC);
}
#endif
return (ENXIO);
}
devclass_t nvme_devclass;
static void
nvme_init(void)
@ -181,7 +79,7 @@ nvme_uninit(void)
SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
static int
int
nvme_shutdown(device_t dev)
{
struct nvme_controller *ctrlr;
@ -225,24 +123,11 @@ nvme_dump_completion(struct nvme_completion *cpl)
cpl->cid, p, sc, sct, m, dnr);
}
static int
int
nvme_attach(device_t dev)
{
struct nvme_controller *ctrlr = DEVICE2SOFTC(dev);
int status;
struct _pcsid *ep;
uint32_t devid;
uint16_t subdevice;
devid = pci_get_devid(dev);
subdevice = pci_get_subdevice(dev);
ep = pci_ids;
while (ep->devid) {
if (nvme_match(devid, subdevice, ep))
break;
++ep;
}
ctrlr->quirks = ep->quirks;
status = nvme_ctrlr_construct(ctrlr, dev);
@ -252,31 +137,7 @@ nvme_attach(device_t dev)
}
/*
* Some drives do not implement the completion timeout feature
* correctly. There's a WAR from the manufacturer to just disable it.
* The driver wouldn't respond correctly to a timeout anyway.
*/
if (ep->quirks & QUIRK_DISABLE_TIMEOUT) {
int ptr;
uint16_t devctl2;
status = pci_find_cap(dev, PCIY_EXPRESS, &ptr);
if (status) {
device_printf(dev, "Can't locate PCIe capability?");
return (status);
}
devctl2 = pci_read_config(dev, ptr + PCIER_DEVICE_CTL2, sizeof(devctl2));
devctl2 |= PCIEM_CTL2_COMP_TIMO_DISABLE;
pci_write_config(dev, ptr + PCIER_DEVICE_CTL2, devctl2, sizeof(devctl2));
}
/*
* Enable busmastering so the completion status messages can
* be busmastered back to the host.
*/
pci_enable_busmaster(dev);
/*
* Reset controller twice to ensure we do a transition from cc.en==1
* Reset controller twice to ensure we do a transition from cc.en==1
* to cc.en==0. This is because we don't really know what status
* the controller was left in when boot handed off to OS.
@ -301,13 +162,12 @@ nvme_attach(device_t dev)
return (0);
}
static int
int
nvme_detach (device_t dev)
{
struct nvme_controller *ctrlr = DEVICE2SOFTC(dev);
nvme_ctrlr_destruct(ctrlr, dev);
pci_disable_busmaster(dev);
return (0);
}

View File

@ -42,48 +42,12 @@ __FBSDID("$FreeBSD$");
#include <sys/uio.h>
#include <sys/endian.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include "nvme_private.h"
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
struct nvme_async_event_request *aer);
static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
static int
nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
{
ctrlr->resource_id = PCIR_BAR(0);
ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
&ctrlr->resource_id, RF_ACTIVE);
if(ctrlr->resource == NULL) {
nvme_printf(ctrlr, "unable to allocate pci resource\n");
return (ENOMEM);
}
ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
/*
* The NVMe spec allows for the MSI-X table to be placed behind
* BAR 4/5, separate from the control/doorbell registers. Always
* try to map this bar, because it must be mapped prior to calling
* pci_alloc_msix(). If the table isn't behind BAR 4/5,
* bus_alloc_resource() will just return NULL which is OK.
*/
ctrlr->bar4_resource_id = PCIR_BAR(4);
ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
&ctrlr->bar4_resource_id, RF_ACTIVE);
return (0);
}
static int
nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
@ -876,9 +840,8 @@ nvme_ctrlr_start(void *ctrlr_arg)
* the number of I/O queues supported, so cannot reset
* the adminq again here.
*/
if (ctrlr->is_resetting) {
if (ctrlr->is_resetting)
nvme_qpair_reset(&ctrlr->adminq);
}
for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_qpair_reset(&ctrlr->ioq[i]);
@ -1004,34 +967,6 @@ nvme_ctrlr_intx_handler(void *arg)
nvme_mmio_write_4(ctrlr, intmc, 1);
}
static int
nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
{
ctrlr->msix_enabled = 0;
ctrlr->num_io_queues = 1;
ctrlr->num_cpus_per_ioq = mp_ncpus;
ctrlr->rid = 0;
ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
&ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
if (ctrlr->res == NULL) {
nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
return (ENOMEM);
}
bus_setup_intr(ctrlr->dev, ctrlr->res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
ctrlr, &ctrlr->tag);
if (ctrlr->tag == NULL) {
nvme_printf(ctrlr, "unable to setup intx handler\n");
return (ENOMEM);
}
return (0);
}
static void
nvme_pt_done(void *arg, const struct nvme_completion *cpl)
{
@ -1177,88 +1112,6 @@ static struct cdevsw nvme_ctrlr_cdevsw = {
.d_ioctl = nvme_ctrlr_ioctl
};
static void
nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
{
device_t dev;
int per_cpu_io_queues;
int min_cpus_per_ioq;
int num_vectors_requested, num_vectors_allocated;
int num_vectors_available;
dev = ctrlr->dev;
min_cpus_per_ioq = 1;
TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
if (min_cpus_per_ioq < 1) {
min_cpus_per_ioq = 1;
} else if (min_cpus_per_ioq > mp_ncpus) {
min_cpus_per_ioq = mp_ncpus;
}
per_cpu_io_queues = 1;
TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
if (per_cpu_io_queues == 0) {
min_cpus_per_ioq = mp_ncpus;
}
ctrlr->force_intx = 0;
TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
/*
* FreeBSD currently cannot allocate more than about 190 vectors at
* boot, meaning that systems with high core count and many devices
* requesting per-CPU interrupt vectors will not get their full
* allotment. So first, try to allocate as many as we may need to
* understand what is available, then immediately release them.
* Then figure out how many of those we will actually use, based on
* assigning an equal number of cores to each I/O queue.
*/
/* One vector for per core I/O queue, plus one vector for admin queue. */
num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
num_vectors_available = 0;
}
pci_release_msi(dev);
if (ctrlr->force_intx || num_vectors_available < 2) {
nvme_ctrlr_configure_intx(ctrlr);
return;
}
/*
* Do not use all vectors for I/O queues - one must be saved for the
* admin queue.
*/
ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
howmany(mp_ncpus, num_vectors_available - 1));
ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
num_vectors_requested = ctrlr->num_io_queues + 1;
num_vectors_allocated = num_vectors_requested;
/*
* Now just allocate the number of vectors we need. This should
* succeed, since we previously called pci_alloc_msix()
* successfully returning at least this many vectors, but just to
* be safe, if something goes wrong just revert to INTx.
*/
if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
nvme_ctrlr_configure_intx(ctrlr);
return;
}
if (num_vectors_allocated < num_vectors_requested) {
pci_release_msi(dev);
nvme_ctrlr_configure_intx(ctrlr);
return;
}
ctrlr->msix_enabled = 1;
}
int
nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
{
@ -1274,11 +1127,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
status = nvme_ctrlr_allocate_bar(ctrlr);
if (status != 0)
return (status);
/*
* Software emulators may set the doorbell stride to something
* other than zero, but this driver is not set up to handle that.
@ -1308,8 +1156,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
ctrlr->enable_aborts = 0;
TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
nvme_ctrlr_setup_interrupts(ctrlr);
ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0)
return (ENXIO);
@ -1395,9 +1241,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
rman_get_rid(ctrlr->res), ctrlr->res);
if (ctrlr->msix_enabled)
pci_release_msi(dev);
if (ctrlr->bar4_resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
ctrlr->bar4_resource_id, ctrlr->bar4_resource);

333
sys/dev/nvme/nvme_pci.c Normal file
View File

@ -0,0 +1,333 @@
/*-
* Copyright (C) 2012-2016 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include "nvme_private.h"
static int nvme_pci_probe(device_t);
static int nvme_pci_attach(device_t);
static int nvme_pci_detach(device_t);
static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
static device_method_t nvme_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, nvme_pci_probe),
DEVMETHOD(device_attach, nvme_pci_attach),
DEVMETHOD(device_detach, nvme_pci_detach),
DEVMETHOD(device_shutdown, nvme_shutdown),
{ 0, 0 }
};
static driver_t nvme_pci_driver = {
"nvme",
nvme_pci_methods,
sizeof(struct nvme_controller),
};
DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, 0);
MODULE_VERSION(nvme_pci, 1);
static struct _pcsid
{
uint32_t devid;
int match_subdevice;
uint16_t subdevice;
const char *desc;
uint32_t quirks;
} pci_ids[] = {
{ 0x01118086, 0, 0, "NVMe Controller" },
{ IDT32_PCI_ID, 0, 0, "IDT NVMe Controller (32 channel)" },
{ IDT8_PCI_ID, 0, 0, "IDT NVMe Controller (8 channel)" },
{ 0x09538086, 1, 0x3702, "DC P3700 SSD" },
{ 0x09538086, 1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
{ 0x09538086, 1, 0x3704, "DC P3500 SSD [Add-in Card]" },
{ 0x09538086, 1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
{ 0x09538086, 1, 0x3709, "DC P3600 SSD [Add-in Card]" },
{ 0x09538086, 1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
{ 0x00031c58, 0, 0, "HGST SN100", QUIRK_DELAY_B4_CHK_RDY },
{ 0x00231c58, 0, 0, "WDC SN200", QUIRK_DELAY_B4_CHK_RDY },
{ 0x05401c5f, 0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
{ 0xa821144d, 0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
{ 0xa822144d, 0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
{ 0x00000000, 0, 0, NULL }
};
static int
nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
{
if (devid != ep->devid)
return 0;
if (!ep->match_subdevice)
return 1;
if (subdevice == ep->subdevice)
return 1;
else
return 0;
}
static int
nvme_pci_probe (device_t device)
{
struct nvme_controller *ctrlr = DEVICE2SOFTC(device);
struct _pcsid *ep;
uint32_t devid;
uint16_t subdevice;
devid = pci_get_devid(device);
subdevice = pci_get_subdevice(device);
ep = pci_ids;
while (ep->devid) {
if (nvme_match(devid, subdevice, ep))
break;
++ep;
}
if (ep->devid)
ctrlr->quirks = ep->quirks;
if (ep->desc) {
device_set_desc(device, ep->desc);
return (BUS_PROBE_DEFAULT);
}
#if defined(PCIS_STORAGE_NVM)
if (pci_get_class(device) == PCIC_STORAGE &&
pci_get_subclass(device) == PCIS_STORAGE_NVM &&
pci_get_progif(device) == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
device_set_desc(device, "Generic NVMe Device");
return (BUS_PROBE_GENERIC);
}
#endif
return (ENXIO);
}
static int
nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
{
ctrlr->resource_id = PCIR_BAR(0);
ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
&ctrlr->resource_id, RF_ACTIVE);
if(ctrlr->resource == NULL) {
nvme_printf(ctrlr, "unable to allocate pci resource\n");
return (ENOMEM);
}
ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
/*
* The NVMe spec allows for the MSI-X table to be placed behind
* BAR 4/5, separate from the control/doorbell registers. Always
* try to map this bar, because it must be mapped prior to calling
* pci_alloc_msix(). If the table isn't behind BAR 4/5,
* bus_alloc_resource() will just return NULL which is OK.
*/
ctrlr->bar4_resource_id = PCIR_BAR(4);
ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
&ctrlr->bar4_resource_id, RF_ACTIVE);
return (0);
}
static int
nvme_pci_attach(device_t dev)
{
struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
int status;
ctrlr->dev = dev;
status = nvme_ctrlr_allocate_bar(ctrlr);
if (status != 0)
goto bad;
pci_enable_busmaster(dev);
nvme_ctrlr_setup_interrupts(ctrlr);
return nvme_attach(dev);
bad:
if (ctrlr->resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
ctrlr->resource_id, ctrlr->resource);
}
if (ctrlr->bar4_resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
ctrlr->bar4_resource_id, ctrlr->bar4_resource);
}
if (ctrlr->tag)
bus_teardown_intr(dev, ctrlr->res, ctrlr->tag);
if (ctrlr->res)
bus_release_resource(dev, SYS_RES_IRQ,
rman_get_rid(ctrlr->res), ctrlr->res);
if (ctrlr->msix_enabled)
pci_release_msi(dev);
return status;
}
static int
nvme_pci_detach(device_t dev)
{
struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
if (ctrlr->msix_enabled)
pci_release_msi(dev);
pci_disable_busmaster(dev);
return (nvme_detach(dev));
}
static int
nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
{
ctrlr->msix_enabled = 0;
ctrlr->num_io_queues = 1;
ctrlr->num_cpus_per_ioq = mp_ncpus;
ctrlr->rid = 0;
ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
&ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
if (ctrlr->res == NULL) {
nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
return (ENOMEM);
}
bus_setup_intr(ctrlr->dev, ctrlr->res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
ctrlr, &ctrlr->tag);
if (ctrlr->tag == NULL) {
nvme_printf(ctrlr, "unable to setup intx handler\n");
return (ENOMEM);
}
return (0);
}
static void
nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
{
device_t dev;
int per_cpu_io_queues;
int min_cpus_per_ioq;
int num_vectors_requested, num_vectors_allocated;
int num_vectors_available;
dev = ctrlr->dev;
min_cpus_per_ioq = 1;
TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
if (min_cpus_per_ioq < 1) {
min_cpus_per_ioq = 1;
} else if (min_cpus_per_ioq > mp_ncpus) {
min_cpus_per_ioq = mp_ncpus;
}
per_cpu_io_queues = 1;
TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
if (per_cpu_io_queues == 0) {
min_cpus_per_ioq = mp_ncpus;
}
ctrlr->force_intx = 0;
TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
/*
* FreeBSD currently cannot allocate more than about 190 vectors at
* boot, meaning that systems with high core count and many devices
* requesting per-CPU interrupt vectors will not get their full
* allotment. So first, try to allocate as many as we may need to
* understand what is available, then immediately release them.
* Then figure out how many of those we will actually use, based on
* assigning an equal number of cores to each I/O queue.
*/
/* One vector for per core I/O queue, plus one vector for admin queue. */
num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
num_vectors_available = 0;
}
pci_release_msi(dev);
if (ctrlr->force_intx || num_vectors_available < 2) {
nvme_ctrlr_configure_intx(ctrlr);
return;
}
/*
* Do not use all vectors for I/O queues - one must be saved for the
* admin queue.
*/
ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
howmany(mp_ncpus, num_vectors_available - 1));
ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
num_vectors_requested = ctrlr->num_io_queues + 1;
num_vectors_allocated = num_vectors_requested;
/*
* Now just allocate the number of vectors we need. This should
* succeed, since we previously called pci_alloc_msix()
* successfully returning at least this many vectors, but just to
* be safe, if something goes wrong just revert to INTx.
*/
if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
nvme_ctrlr_configure_intx(ctrlr);
return;
}
if (num_vectors_allocated < num_vectors_requested) {
pci_release_msi(dev);
nvme_ctrlr_configure_intx(ctrlr);
return;
}
ctrlr->msix_enabled = 1;
}

View File

@ -37,6 +37,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/rman.h>
#include <sys/systm.h>
@ -122,6 +123,8 @@ struct nvme_completion_poll_status {
int done;
};
extern devclass_t nvme_devclass;
#define NVME_REQUEST_VADDR 1
#define NVME_REQUEST_NULL 2 /* For requests with no payload. */
#define NVME_REQUEST_UIO 3
@ -439,6 +442,10 @@ void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr);
void nvme_dump_command(struct nvme_command *cmd);
void nvme_dump_completion(struct nvme_completion *cpl);
int nvme_attach(device_t dev);
int nvme_shutdown(device_t dev);
int nvme_detach(device_t dev);
static __inline void
nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
{

View File

@ -9,6 +9,7 @@ SRCS = nvme.c \
nvme_ctrlr_cmd.c \
nvme_ns.c \
nvme_ns_cmd.c \
nvme_pci.c \
nvme_qpair.c \
nvme_sim.c \
nvme_sysctl.c \