bhyve nvme: Add AEN support to NVMe emulation

Add Asynchronous Event Notification infrastructure to the NVMe
emulation.

Reviewed by:	imp, grehan
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D32952
This commit is contained in:
Chuck Tuffli 2021-12-14 23:16:49 -08:00
parent 31b35400cf
commit 9f1fa1a461

View File

@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <assert.h>
#include <pthread.h>
#include <pthread_np.h>
#include <semaphore.h>
#include <stdbool.h>
#include <stddef.h>
@ -250,11 +251,39 @@ struct nvme_feature_obj {
#define NVME_FID_MAX (NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1)
typedef enum {
PCI_NVME_AE_TYPE_ERROR = 0,
PCI_NVME_AE_TYPE_SMART,
PCI_NVME_AE_TYPE_NOTICE,
PCI_NVME_AE_TYPE_IO_CMD = 6,
PCI_NVME_AE_TYPE_VENDOR = 7,
PCI_NVME_AE_TYPE_MAX /* Must be last */
} pci_nvme_async_type;
/* Asynchronous Event Requests */
struct pci_nvme_aer {
STAILQ_ENTRY(pci_nvme_aer) link;
uint16_t cid; /* Command ID of the submitted AER */
};
typedef enum {
PCI_NVME_AE_INFO_NS_ATTR_CHANGED = 0,
PCI_NVME_AE_INFO_FW_ACTIVATION,
PCI_NVME_AE_INFO_TELEMETRY_CHANGE,
PCI_NVME_AE_INFO_ANA_CHANGE,
PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE,
PCI_NVME_AE_INFO_LBA_STATUS_ALERT,
PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE,
PCI_NVME_AE_INFO_MAX,
} pci_nvme_async_info;
/* Asynchronous Event Notifications */
struct pci_nvme_aen {
pci_nvme_async_type atype;
uint32_t event_data;
bool posted;
};
struct pci_nvme_softc {
struct pci_devinst *nsc_pi;
@ -302,10 +331,21 @@ struct pci_nvme_softc {
uint32_t write_dunits_remainder;
STAILQ_HEAD(, pci_nvme_aer) aer_list;
pthread_mutex_t aer_mtx;
uint32_t aer_count;
struct pci_nvme_aen aen[PCI_NVME_AE_TYPE_MAX];
pthread_t aen_tid;
pthread_mutex_t aen_mtx;
pthread_cond_t aen_cond;
};
static void pci_nvme_cq_update(struct pci_nvme_softc *sc,
struct nvme_completion_queue *cq,
uint32_t cdw0,
uint16_t cid,
uint16_t sqid,
uint16_t status);
static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *);
static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *);
static void pci_nvme_io_done(struct blockif_req *, int);
@ -360,6 +400,8 @@ static void nvme_feature_iv_config(struct pci_nvme_softc *,
struct nvme_command *,
struct nvme_completion *);
static void *aen_thr(void *arg);
static __inline void
cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
{
@ -623,6 +665,8 @@ pci_nvme_init_features(struct pci_nvme_softc *sc)
sc->feat[NVME_FEAT_NUMBER_OF_QUEUES].set = nvme_feature_num_queues;
sc->feat[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION].set =
nvme_feature_iv_config;
/* Enable all AENs by default */
sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11 = 0x31f;
sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG].get =
nvme_feature_invalid_cb;
sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW].get =
@ -630,32 +674,42 @@ pci_nvme_init_features(struct pci_nvme_softc *sc)
}
static void
pci_nvme_aer_init(struct pci_nvme_softc *sc)
pci_nvme_aer_reset(struct pci_nvme_softc *sc)
{
STAILQ_INIT(&sc->aer_list);
sc->aer_count = 0;
}
static void
pci_nvme_aer_init(struct pci_nvme_softc *sc)
{
pthread_mutex_init(&sc->aer_mtx, NULL);
pci_nvme_aer_reset(sc);
}
static void
pci_nvme_aer_destroy(struct pci_nvme_softc *sc)
{
struct pci_nvme_aer *aer = NULL;
pthread_mutex_lock(&sc->aer_mtx);
while (!STAILQ_EMPTY(&sc->aer_list)) {
aer = STAILQ_FIRST(&sc->aer_list);
STAILQ_REMOVE_HEAD(&sc->aer_list, link);
free(aer);
}
pthread_mutex_unlock(&sc->aer_mtx);
pci_nvme_aer_init(sc);
pci_nvme_aer_reset(sc);
}
static bool
pci_nvme_aer_available(struct pci_nvme_softc *sc)
{
return (!STAILQ_EMPTY(&sc->aer_list));
return (sc->aer_count != 0);
}
static bool
@ -687,11 +741,13 @@ pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid)
if (aer == NULL)
return (-1);
sc->aer_count++;
/* Save the Command ID for use in the completion message */
aer->cid = cid;
pthread_mutex_lock(&sc->aer_mtx);
sc->aer_count++;
STAILQ_INSERT_TAIL(&sc->aer_list, aer, link);
pthread_mutex_unlock(&sc->aer_mtx);
return (0);
}
@ -707,15 +763,208 @@ pci_nvme_aer_get(struct pci_nvme_softc *sc)
{
struct pci_nvme_aer *aer = NULL;
pthread_mutex_lock(&sc->aer_mtx);
aer = STAILQ_FIRST(&sc->aer_list);
if (aer != NULL) {
STAILQ_REMOVE_HEAD(&sc->aer_list, link);
sc->aer_count--;
}
pthread_mutex_unlock(&sc->aer_mtx);
return (aer);
}
static void
pci_nvme_aen_reset(struct pci_nvme_softc *sc)
{
uint32_t atype;
memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen));
for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
sc->aen[atype].atype = atype;
}
}
static void
pci_nvme_aen_init(struct pci_nvme_softc *sc)
{
char nstr[80];
pci_nvme_aen_reset(sc);
pthread_mutex_init(&sc->aen_mtx, NULL);
pthread_create(&sc->aen_tid, NULL, aen_thr, sc);
snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot,
sc->nsc_pi->pi_func);
pthread_set_name_np(sc->aen_tid, nstr);
}
static void
pci_nvme_aen_destroy(struct pci_nvme_softc *sc)
{
pci_nvme_aen_reset(sc);
}
/* Notify the AEN thread of pending work */
static void
pci_nvme_aen_notify(struct pci_nvme_softc *sc)
{
pthread_cond_signal(&sc->aen_cond);
}
/*
* Post an Asynchronous Event Notification
*/
static int32_t
pci_nvme_aen_post(struct pci_nvme_softc *sc, pci_nvme_async_type atype,
uint32_t event_data)
{
struct pci_nvme_aen *aen;
if (atype >= PCI_NVME_AE_TYPE_MAX) {
return(EINVAL);
}
pthread_mutex_lock(&sc->aen_mtx);
aen = &sc->aen[atype];
/* Has the controller already posted an event of this type? */
if (aen->posted) {
pthread_mutex_unlock(&sc->aen_mtx);
return(EALREADY);
}
aen->event_data = event_data;
aen->posted = true;
pthread_mutex_unlock(&sc->aen_mtx);
pci_nvme_aen_notify(sc);
return(0);
}
static void
pci_nvme_aen_process(struct pci_nvme_softc *sc)
{
struct pci_nvme_aer *aer;
struct pci_nvme_aen *aen;
pci_nvme_async_type atype;
uint32_t mask;
uint16_t status;
uint8_t lid;
assert(pthread_mutex_isowned_np(&sc->aen_mtx));
for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
aen = &sc->aen[atype];
/* Previous iterations may have depleted the available AER's */
if (!pci_nvme_aer_available(sc)) {
DPRINTF("%s: no AER", __func__);
break;
}
if (!aen->posted) {
DPRINTF("%s: no AEN posted for atype=%#x", __func__, atype);
continue;
}
status = NVME_SC_SUCCESS;
/* Is the event masked? */
mask =
sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11;
DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data);
switch (atype) {
case PCI_NVME_AE_TYPE_ERROR:
lid = NVME_LOG_ERROR;
break;
case PCI_NVME_AE_TYPE_SMART:
mask &= 0xff;
if ((mask & aen->event_data) == 0)
continue;
lid = NVME_LOG_HEALTH_INFORMATION;
break;
case PCI_NVME_AE_TYPE_NOTICE:
if (aen->event_data >= PCI_NVME_AE_INFO_MAX) {
EPRINTLN("%s unknown AEN notice type %u",
__func__, aen->event_data);
status = NVME_SC_INTERNAL_DEVICE_ERROR;
break;
}
mask >>= 8;
if (((1 << aen->event_data) & mask) == 0)
continue;
switch (aen->event_data) {
case PCI_NVME_AE_INFO_NS_ATTR_CHANGED:
lid = NVME_LOG_CHANGED_NAMESPACE;
break;
case PCI_NVME_AE_INFO_FW_ACTIVATION:
lid = NVME_LOG_FIRMWARE_SLOT;
break;
case PCI_NVME_AE_INFO_TELEMETRY_CHANGE:
lid = NVME_LOG_TELEMETRY_CONTROLLER_INITIATED;
break;
case PCI_NVME_AE_INFO_ANA_CHANGE:
lid = NVME_LOG_ASYMMETRIC_NAMESPAVE_ACCESS; //TODO spelling
break;
case PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE:
lid = NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE;
break;
case PCI_NVME_AE_INFO_LBA_STATUS_ALERT:
lid = NVME_LOG_LBA_STATUS_INFORMATION;
break;
case PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE:
lid = NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE;
break;
default:
lid = 0;
}
break;
default:
/* bad type?!? */
EPRINTLN("%s unknown AEN type %u", __func__, atype);
status = NVME_SC_INTERNAL_DEVICE_ERROR;
break;
}
aer = pci_nvme_aer_get(sc);
assert(aer != NULL);
DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype);
pci_nvme_cq_update(sc, &sc->compl_queues[0],
(lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */
aer->cid,
0, /* SQID */
status);
aen->event_data = 0;
aen->posted = false;
pci_generate_msix(sc->nsc_pi, 0);
}
}
static void *
aen_thr(void *arg)
{
struct pci_nvme_softc *sc;
sc = arg;
pthread_mutex_lock(&sc->aen_mtx);
for (;;) {
pci_nvme_aen_process(sc);
pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx);
}
pthread_mutex_unlock(&sc->aen_mtx);
pthread_exit(NULL);
return (NULL);
}
static void
pci_nvme_reset_locked(struct pci_nvme_softc *sc)
{
@ -756,6 +1005,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc)
sc->num_q_is_set = false;
pci_nvme_aer_destroy(sc);
pci_nvme_aen_destroy(sc);
}
static void
@ -1406,8 +1656,13 @@ nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command,
if (feat->set)
feat->set(sc, feat, command, compl);
if (compl->status == NVME_SC_SUCCESS)
DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11);
if (compl->status == NVME_SC_SUCCESS) {
feat->cdw11 = command->cdw11;
if ((fid == NVME_FEAT_ASYNC_EVENT_CONFIGURATION) &&
(command->cdw11 != 0))
pci_nvme_aen_notify(sc);
}
return (0);
}
@ -1525,7 +1780,8 @@ static int
nvme_opc_async_event_req(struct pci_nvme_softc* sc,
struct nvme_command* command, struct nvme_completion* compl)
{
DPRINTF("%s async event request 0x%x", __func__, command->cdw11);
DPRINTF("%s async event request count=%u aerl=%u cid=%#x", __func__,
sc->aer_count, sc->ctrldata.aerl, command->cid);
/* Don't exceed the Async Event Request Limit (AERL). */
if (pci_nvme_aer_limit_reached(sc)) {
@ -1546,6 +1802,7 @@ nvme_opc_async_event_req(struct pci_nvme_softc* sc,
* there is an event reflective of the request to get event.
*/
compl->status = NVME_NO_STATUS;
pci_nvme_aen_notify(sc);
return (0);
}
@ -2025,6 +2282,7 @@ nvme_opc_write_read(struct pci_nvme_softc *sc,
lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
nblocks = (cmd->cdw12 & 0xFFFF) + 1;
if (pci_nvme_out_of_range(nvstore, lba, nblocks)) {
WPRINTF("%s command would exceed LBA range", __func__);
pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
@ -2789,6 +3047,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
pci_nvme_init_features(sc);
pci_nvme_aer_init(sc);
pci_nvme_aen_init(sc);
pci_nvme_reset(sc);