if_ntb: Add Xeon link watchdog register writes

This feature is disabled by default.  To enable it, tune
hw.if_ntb.enable_xeon_watchdog to non-zero.

If enabled, writes an unused NTB register every second to demonstrate to
a hardware watchdog that the NTB device is still alive.  Most machines
with NTB will not need this -- you know who you are.

Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Conrad Meyer 2015-11-19 19:53:09 +00:00
parent ed74ab27a7
commit f51a1fe048
3 changed files with 34 additions and 2 deletions

View File

@ -114,6 +114,11 @@ SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
"0 (default) - use all available NTB memory windows; "
"positive integer N - Limit to N memory windows.");
static unsigned enable_xeon_watchdog;
SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
&enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
"keep a watchdog from tearing down the NTB link");
STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
typedef uint32_t ntb_q_idx_t;
@ -227,6 +232,7 @@ struct ntb_transport_ctx {
unsigned qp_count;
volatile bool link_is_up;
struct callout link_work;
struct callout link_watchdog;
struct task link_cleanup;
uint64_t bufsize;
u_char eaddr[ETHER_ADDR_LEN];
@ -268,7 +274,17 @@ enum {
IF_NTB_MW1_SZ_HIGH,
IF_NTB_MW1_SZ_LOW,
IF_NTB_MAX_SPAD,
/*
* Some NTB-using hardware have a watchdog to work around NTB hangs; if
* a register or doorbell isn't written every few seconds, the link is
* torn down. Write an otherwise unused register every few seconds to
* work around this watchdog.
*/
IF_NTB_WATCHDOG_SPAD = 15
};
CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT &&
IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT);
#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
#define NTB_QP_DEF_NUM_ENTRIES 100
@ -330,6 +346,7 @@ static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
struct ntb_queue_list *from, struct ntb_queue_list *to);
static void create_random_local_eui48(u_char *eaddr);
static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
static void xeon_link_watchdog_hb(void *);
static const struct ntb_ctx_ops ntb_transport_ops = {
.link_event = ntb_transport_event_callback,
@ -562,6 +579,16 @@ ntb_net_event_handler(void *data, enum ntb_link_event status)
/* Transport Init and teardown */
static void
xeon_link_watchdog_hb(void *arg)
{
struct ntb_transport_ctx *nt;
nt = arg;
ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0);
callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
}
static int
ntb_transport_probe(struct ntb_softc *ntb)
{
@ -611,6 +638,7 @@ ntb_transport_probe(struct ntb_softc *ntb)
}
callout_init(&nt->link_work, 0);
callout_init(&nt->link_watchdog, 0);
TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops);
@ -622,6 +650,8 @@ ntb_transport_probe(struct ntb_softc *ntb)
ntb_link_event(ntb);
callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
if (enable_xeon_watchdog != 0)
callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
return (0);
err:
@ -640,6 +670,7 @@ ntb_transport_free(struct ntb_transport_ctx *nt)
ntb_transport_link_cleanup(nt);
taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
callout_drain(&nt->link_work);
callout_drain(&nt->link_watchdog);
BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);

View File

@ -103,6 +103,9 @@ uint64_t ntb_db_read(struct ntb_softc *);
void ntb_db_set_mask(struct ntb_softc *, uint64_t bits);
void ntb_peer_db_set(struct ntb_softc *, uint64_t bits);
#define XEON_SPAD_COUNT 16
#define ATOM_SPAD_COUNT 16
/* Hardware owns the low 16 bits of features. */
#define NTB_BAR_SIZE_4K (1 << 0)
#define NTB_SDOORBELL_LOCKUP (1 << 1)

View File

@ -44,7 +44,6 @@
#define XEON_DB_MSIX_VECTOR_COUNT 4
#define XEON_DB_MSIX_VECTOR_SHIFT 5
#define XEON_DB_LINK_BIT (1 << XEON_DB_LINK)
#define XEON_SPAD_COUNT 16
#define XEON_SPCICMD_OFFSET 0x0504
#define XEON_DEVCTRL_OFFSET 0x0598
@ -89,7 +88,6 @@
#define ATOM_DB_COUNT 34
#define ATOM_DB_MSIX_VECTOR_COUNT 34
#define ATOM_DB_MSIX_VECTOR_SHIFT 1
#define ATOM_SPAD_COUNT 16
#define ATOM_SPCICMD_OFFSET 0xb004
#define ATOM_MBAR23_OFFSET 0xb018