nvme: Handle SIGBUS generated when a device is hot removed

Catch SIGBUS and handle it by remapping new memory into the
location where the BAR previously was.

Change-Id: Ie8d00a60a0bbe7f7ec57a5c39c0a63c5d9443206
Signed-off-by: Cunyin Chang <cunyin.chang@intel.com>
This commit is contained in:
Cunyin Chang 2016-11-15 10:33:24 +08:00 committed by Ben Walker
parent 56765aa39f
commit 0e807fae27

View File

@ -35,6 +35,11 @@
* NVMe over PCIe transport * NVMe over PCIe transport
*/ */
#include <sys/mman.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include "nvme_internal.h" #include "nvme_internal.h"
#define NVME_ADMIN_ENTRIES (128) #define NVME_ADMIN_ENTRIES (128)
@ -80,6 +85,9 @@ struct nvme_pcie_ctrlr {
/** NVMe MMIO register space */ /** NVMe MMIO register space */
volatile struct spdk_nvme_registers *regs; volatile struct spdk_nvme_registers *regs;
/** NVMe MMIO register size */
uint64_t regs_size;
/* BAR mapping address which contains controller memory buffer */ /* BAR mapping address which contains controller memory buffer */
void *cmb_bar_virt_addr; void *cmb_bar_virt_addr;
@ -97,6 +105,9 @@ struct nvme_pcie_ctrlr {
/* Opaque handle to associated PCI device. */ /* Opaque handle to associated PCI device. */
struct spdk_pci_device *devhandle; struct spdk_pci_device *devhandle;
/* Flag to indicate the MMIO register has been remapped */
bool is_remapped;
}; };
struct nvme_tracker { struct nvme_tracker {
@ -170,6 +181,49 @@ struct nvme_pcie_qpair {
uint64_t cpl_bus_addr; uint64_t cpl_bus_addr;
}; };
__thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL;
static volatile uint16_t g_signal_lock;
static bool g_sigset = false;
static void
nvme_sigbus_fault_sighandler(int signum, siginfo_t *info, void *ctx)
{
void *map_address;
if (!__sync_bool_compare_and_swap(&g_signal_lock, 0, 1)) {
return;
}
if (g_thread_mmio_ctrlr) {
if (!g_thread_mmio_ctrlr->is_remapped) {
map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
if (map_address == MAP_FAILED) {
SPDK_ERRLOG("mmap failed\n");
g_signal_lock = 0;
return;
}
memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers));
g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address;
g_thread_mmio_ctrlr->is_remapped = true;
}
}
g_signal_lock = 0;
return;
}
static void
nvme_pcie_ctrlr_setup_signal(void)
{
struct sigaction sa;
sa.sa_sigaction = nvme_sigbus_fault_sighandler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sigaction(SIGBUS, &sa, NULL);
}
static inline struct nvme_pcie_ctrlr * static inline struct nvme_pcie_ctrlr *
nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
{ {
@ -206,34 +260,58 @@ nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset)
int int
nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value)
{ {
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
assert(offset <= sizeof(struct spdk_nvme_registers) - 4); assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value); spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value);
g_thread_mmio_ctrlr = NULL;
return 0; return 0;
} }
int int
nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value)
{ {
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
assert(offset <= sizeof(struct spdk_nvme_registers) - 8); assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value); spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value);
g_thread_mmio_ctrlr = NULL;
return 0; return 0;
} }
int int
nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value)
{ {
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
assert(offset <= sizeof(struct spdk_nvme_registers) - 4); assert(offset <= sizeof(struct spdk_nvme_registers) - 4);
assert(value != NULL); assert(value != NULL);
g_thread_mmio_ctrlr = pctrlr;
*value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset)); *value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset));
g_thread_mmio_ctrlr = NULL;
if (~(*value) == 0) {
return -1;
}
return 0; return 0;
} }
int int
nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value)
{ {
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr);
assert(offset <= sizeof(struct spdk_nvme_registers) - 8); assert(offset <= sizeof(struct spdk_nvme_registers) - 8);
assert(value != NULL); assert(value != NULL);
g_thread_mmio_ctrlr = pctrlr;
*value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset)); *value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset));
g_thread_mmio_ctrlr = NULL;
if (~(*value) == 0) {
return -1;
}
return 0; return 0;
} }
@ -391,6 +469,7 @@ nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr)
return -1; return -1;
} }
pctrlr->regs_size = size;
nvme_pcie_ctrlr_map_cmb(pctrlr); nvme_pcie_ctrlr_map_cmb(pctrlr);
return 0; return 0;
@ -409,6 +488,9 @@ nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr)
} }
if (addr) { if (addr) {
/* NOTE: addr may have been remapped here. We're relying on DPDK to call
* munmap internally.
*/
rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr); rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr);
} }
return rc; return rc;
@ -486,6 +568,7 @@ struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(enum spdk_nvme_transport trans
return NULL; return NULL;
} }
pctrlr->is_remapped = false;
pctrlr->ctrlr.transport = SPDK_NVME_TRANSPORT_PCIE; pctrlr->ctrlr.transport = SPDK_NVME_TRANSPORT_PCIE;
pctrlr->devhandle = devhandle; pctrlr->devhandle = devhandle;
@ -531,6 +614,11 @@ struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(enum spdk_nvme_transport trans
return NULL; return NULL;
} }
if (g_sigset != true) {
nvme_pcie_ctrlr_setup_signal();
g_sigset = true;
}
return &pctrlr->ctrlr; return &pctrlr->ctrlr;
} }
@ -812,6 +900,7 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke
{ {
struct nvme_request *req; struct nvme_request *req;
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
req = tr->req; req = tr->req;
pqpair->tr[tr->cid].active = true; pqpair->tr[tr->cid].active = true;
@ -824,7 +913,9 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke
} }
spdk_wmb(); spdk_wmb();
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail); spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail);
g_thread_mmio_ctrlr = NULL;
} }
static void static void
@ -1602,6 +1693,7 @@ int32_t
nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
{ {
struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr);
struct nvme_tracker *tr; struct nvme_tracker *tr;
struct spdk_nvme_cpl *cpl; struct spdk_nvme_cpl *cpl;
uint32_t num_completions = 0; uint32_t num_completions = 0;
@ -1658,7 +1750,9 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
} }
if (num_completions > 0) { if (num_completions > 0) {
g_thread_mmio_ctrlr = pctrlr;
spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head);
g_thread_mmio_ctrlr = NULL;
} }
/* Before returning, complete any pending admin request. */ /* Before returning, complete any pending admin request. */