nvme: perform resets in parallel during attach

When multiple NVMe controllers are being initialized during
spdk_nvme_probe(), we can overlap the hardware resets of all controllers
to improve startup time.

Rewrite the initialization sequence as a polling function,
nvme_ctrlr_process_init(), that maintains a per-controller state machine
to determine which initialization step is underway.  Each step also has
a timeout to ensure the process will terminate if the hardware is hung.

Currently, only the hardware reset (toggling of CC.EN and waiting for
CSTS.RDY) is done in parallel; the rest of initialization is done
sequentially in nvme_ctrlr_start() as before.  These steps could also be
parallelized in a similar framework if measurements indicate that they
take a significant amount of time.

Change-Id: I02ce5863f1b5c13ad65ccd8be571085528d98bd5
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Daniel Verkamp 2016-02-23 16:36:13 -07:00
parent a62b194f1b
commit 20abbe8abe
6 changed files with 216 additions and 68 deletions

View File

@ -292,7 +292,7 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a
{
int rc, start_rc;
struct nvme_enum_ctx enum_ctx;
struct spdk_nvme_ctrlr *ctrlr;
struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
nvme_mutex_lock(&g_nvme_driver.lock);
@ -305,38 +305,48 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a
* but maintain the value of rc to signal errors when we return.
*/
/* TODO: This could be reworked to start all the controllers in parallel. */
/* Initialize all new controllers in the init_ctrlrs list in parallel. */
while (!TAILQ_EMPTY(&g_nvme_driver.init_ctrlrs)) {
/* Remove ctrlr from init_ctrlrs and attempt to start it */
ctrlr = TAILQ_FIRST(&g_nvme_driver.init_ctrlrs);
TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
/*
* Drop the driver lock while calling nvme_ctrlr_start() since it needs to acquire
* the driver lock internally.
*
* TODO: Rethink the locking - maybe reset should take the lock so that start() and
* the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
* can assume it is held.
*/
nvme_mutex_unlock(&g_nvme_driver.lock);
start_rc = nvme_ctrlr_start(ctrlr);
nvme_mutex_lock(&g_nvme_driver.lock);
if (start_rc == 0) {
TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq);
/*
* Unlock while calling attach_cb() so the user can call other functions
* that may take the driver lock, like nvme_detach().
TAILQ_FOREACH_SAFE(ctrlr, &g_nvme_driver.init_ctrlrs, tailq, ctrlr_tmp) {
/* Drop the driver lock while calling nvme_ctrlr_process_init()
* since it needs to acquire the driver lock internally when calling
* nvme_ctrlr_start().
*
* TODO: Rethink the locking - maybe reset should take the lock so that start() and
* the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
* can assume it is held.
*/
nvme_mutex_unlock(&g_nvme_driver.lock);
attach_cb(cb_ctx, ctrlr->devhandle, ctrlr);
start_rc = nvme_ctrlr_process_init(ctrlr);
nvme_mutex_lock(&g_nvme_driver.lock);
} else {
nvme_ctrlr_destruct(ctrlr);
nvme_free(ctrlr);
rc = -1;
if (start_rc) {
/* Controller failed to initialize. */
TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
nvme_ctrlr_destruct(ctrlr);
nvme_free(ctrlr);
rc = -1;
break;
}
if (ctrlr->state == NVME_CTRLR_STATE_READY) {
/*
* Controller has been initialized.
* Move it to the attached_ctrlrs list.
*/
TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq);
/*
* Unlock while calling attach_cb() so the user can call other functions
* that may take the driver lock, like nvme_detach().
*/
nvme_mutex_unlock(&g_nvme_driver.lock);
attach_cb(cb_ctx, ctrlr->devhandle, ctrlr);
nvme_mutex_lock(&g_nvme_driver.lock);
break;
}
}
}

View File

@ -326,18 +326,13 @@ static int
nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
{
union spdk_nvme_cc_register cc;
union spdk_nvme_csts_register csts;
union spdk_nvme_aqa_register aqa;
cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
csts.raw = nvme_mmio_read_4(ctrlr, csts);
if (cc.bits.en == 1) {
if (csts.bits.rdy == 1) {
return 0;
} else {
return nvme_ctrlr_wait_for_ready(ctrlr, 1);
}
if (cc.bits.en != 0) {
nvme_printf(ctrlr, "%s called with CC.EN = 1\n", __func__);
return EINVAL;
}
nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
@ -361,41 +356,26 @@ nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
return nvme_ctrlr_wait_for_ready(ctrlr, 1);
return 0;
}
static int
nvme_ctrlr_hw_reset(struct spdk_nvme_ctrlr *ctrlr)
static void
nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
uint64_t timeout_in_ms)
{
uint32_t i;
int rc;
union spdk_nvme_cc_register cc;
cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
if (cc.bits.en) {
nvme_qpair_disable(&ctrlr->adminq);
for (i = 0; i < ctrlr->num_io_queues; i++) {
nvme_qpair_disable(&ctrlr->ioq[i]);
}
ctrlr->state = state;
if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
} else {
/*
* The controller was already disabled. We will assume that nothing
* has been changed since cc.en was set to 0,
* meaning that we don't need to do an extra reset, and we can just
* re-enable the controller.
*/
ctrlr->state_timeout_tsc = nvme_get_tsc() + (timeout_in_ms * nvme_get_tsc_hz()) / 1000;
}
nvme_ctrlr_disable(ctrlr);
rc = nvme_ctrlr_enable(ctrlr);
return rc;
}
int
spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
{
int rc;
int rc = 0;
uint32_t i;
nvme_mutex_lock(&ctrlr->ctrlr_lock);
@ -412,10 +392,23 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
ctrlr->is_resetting = true;
nvme_printf(ctrlr, "resetting controller\n");
/* nvme_ctrlr_start() issues a reset as its first step */
rc = nvme_ctrlr_start(ctrlr);
if (rc) {
nvme_ctrlr_fail(ctrlr);
/* Disable all queues before disabling the controller hardware. */
nvme_qpair_disable(&ctrlr->adminq);
for (i = 0; i < ctrlr->num_io_queues; i++) {
nvme_qpair_disable(&ctrlr->ioq[i]);
}
/* Set the state back to INIT to cause a full hardware reset. */
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
while (ctrlr->state != NVME_CTRLR_STATE_READY) {
if (nvme_ctrlr_process_init(ctrlr) != 0) {
nvme_printf(ctrlr, "%s: controller reinitialization failed\n", __func__);
nvme_ctrlr_fail(ctrlr);
rc = -1;
break;
}
}
ctrlr->is_resetting = false;
@ -699,13 +692,107 @@ nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
return 0;
}
/**
* This function will be called repeatedly during initialization until the controller is ready.
*/
int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
{
if (nvme_ctrlr_hw_reset(ctrlr) != 0) {
union spdk_nvme_cc_register cc;
union spdk_nvme_csts_register csts;
union spdk_nvme_cap_lo_register cap_lo;
uint32_t ready_timeout_in_ms;
int rc;
cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
csts.raw = nvme_mmio_read_4(ctrlr, csts);
cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);
ready_timeout_in_ms = 500 * cap_lo.bits.to;
/*
* Check if the current initialization step is done or has timed out.
*/
switch (ctrlr->state) {
case NVME_CTRLR_STATE_INIT:
/* Begin the hardware initialization by making sure the controller is disabled. */
if (cc.bits.en) {
/*
* Controller is currently enabled. We need to disable it to cause a reset.
*
* If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
* Wait for the ready bit to be 1 before disabling the controller.
*/
if (csts.bits.rdy == 0) {
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
return 0;
}
/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
cc.bits.en = 0;
nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
return 0;
} else {
/*
* Controller is currently disabled. We can jump straight to enabling it.
*/
nvme_ctrlr_enable(ctrlr);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
return 0;
}
break;
case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
if (csts.bits.rdy == 1) {
/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
cc.bits.en = 0;
nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
return 0;
}
break;
case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
if (csts.bits.rdy == 0) {
/* CC.EN = 0 && CSTS.RDY = 0, so we can enable the controller now. */
nvme_ctrlr_enable(ctrlr);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
return 0;
}
break;
case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
if (csts.bits.rdy == 1) {
/*
* The controller has been enabled.
* Perform the rest of initialization in nvme_ctrlr_start() serially.
*/
rc = nvme_ctrlr_start(ctrlr);
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
return rc;
}
break;
default:
nvme_assert(0, ("unhandled ctrlr state %d\n", ctrlr->state));
nvme_ctrlr_fail(ctrlr);
return -1;
}
if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
nvme_get_tsc() > ctrlr->state_timeout_tsc) {
nvme_printf(ctrlr, "Initialization timed out in state %d\n", ctrlr->state);
nvme_ctrlr_fail(ctrlr);
return -1;
}
return 0;
}
int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
{
nvme_qpair_reset(&ctrlr->adminq);
nvme_qpair_enable(&ctrlr->adminq);
@ -771,6 +858,7 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
int status;
int rc;
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
ctrlr->devhandle = devhandle;
status = nvme_ctrlr_allocate_bars(ctrlr);

View File

@ -287,6 +287,38 @@ struct spdk_nvme_ns {
uint16_t flags;
};
/**
* State of struct spdk_nvme_ctrlr (in particular, during initialization).
*/
enum nvme_ctrlr_state {
/**
* Controller has not been initialized yet.
*/
NVME_CTRLR_STATE_INIT,
/**
* Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0.
*/
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
/**
* Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1.
*/
NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
/**
* Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller.
*/
NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
/**
* Controller initialization has completed and the controller is ready.
*/
NVME_CTRLR_STATE_READY
};
#define NVME_TIMEOUT_INFINITE UINT64_MAX
/*
* One of these per allocated PCI device.
*/
@ -310,6 +342,9 @@ struct spdk_nvme_ctrlr {
/* Cold data (not accessed in normal I/O path) is after this point. */
enum nvme_ctrlr_state state;
uint64_t state_timeout_tsc;
TAILQ_ENTRY(spdk_nvme_ctrlr) tailq;
/** All the log pages supported */
@ -433,6 +468,7 @@ void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl);
int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle);
void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr);
int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr);
int nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr);
void nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,

View File

@ -57,6 +57,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
}
int
nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
{
return 0;
}
int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
{

View File

@ -47,6 +47,8 @@ static uint16_t g_pci_subdevice_id;
char outbuf[OUTBUF_SIZE];
uint64_t g_ut_tsc = 0;
__thread int nvme_thread_ioq_index = -1;
uint16_t

View File

@ -55,6 +55,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
}
int
nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
{
return 0;
}
int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
{