sfxge(4): add possibility to control event queue performance profile

It is ignored on SFN5xxx/6xxx (aka Siena).

Reviewed by:    gnn
Sponsored by:   Solarflare Communications, Inc.
MFC after:      2 days
Differential Revision:  https://reviews.freebsd.org/D8931
This commit is contained in:
Andrew Rybchenko 2016-12-28 17:45:52 +00:00
parent ed7aec1e45
commit a3fe009ab4
6 changed files with 77 additions and 29 deletions

View File

@ -140,6 +140,7 @@ efx_mcdi_init_evq(
__in size_t nevs,
__in uint32_t irq,
__in uint32_t us,
__in uint32_t flags,
__in boolean_t low_latency)
{
efx_mcdi_req_t req;
@ -178,7 +179,20 @@ efx_mcdi_init_evq(
* So always enable RX and TX event batching, and enable event cut
* through if we want low latency operation.
*/
ev_cut_through = low_latency ? 1 : 0;
switch (flags & EFX_EVQ_FLAGS_TYPE_MASK) {
case EFX_EVQ_FLAGS_TYPE_AUTO:
ev_cut_through = low_latency ? 1 : 0;
break;
case EFX_EVQ_FLAGS_TYPE_THROUGHPUT:
ev_cut_through = 0;
break;
case EFX_EVQ_FLAGS_TYPE_LOW_LATENCY:
ev_cut_through = 1;
break;
default:
rc = EINVAL;
goto fail2;
}
MCDI_IN_POPULATE_DWORD_6(req, INIT_EVQ_IN_FLAGS,
INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
INIT_EVQ_IN_FLAG_RPTR_DOS, 0,
@ -197,7 +211,7 @@ efx_mcdi_init_evq(
unsigned int ticks;
if ((rc = efx_ev_usecs_to_ticks(enp, us, &ticks)) != 0)
goto fail2;
goto fail3;
MCDI_IN_SET_DWORD(req, INIT_EVQ_IN_TMR_MODE,
MC_CMD_INIT_EVQ_IN_TMR_INT_HLDOFF);
@ -225,18 +239,20 @@ efx_mcdi_init_evq(
if (req.emr_rc != 0) {
rc = req.emr_rc;
goto fail3;
goto fail4;
}
if (req.emr_out_length_used < MC_CMD_INIT_EVQ_OUT_LEN) {
rc = EMSGSIZE;
goto fail4;
goto fail5;
}
/* NOTE: ignore the returned IRQ param as firmware does not set it. */
return (0);
fail5:
EFSYS_PROBE(fail5);
fail4:
EFSYS_PROBE(fail4);
fail3:
@ -257,12 +273,14 @@ efx_mcdi_init_evq_v2(
__in efsys_mem_t *esmp,
__in size_t nevs,
__in uint32_t irq,
__in uint32_t us)
__in uint32_t us,
__in uint32_t flags)
{
efx_mcdi_req_t req;
uint8_t payload[
MAX(MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_EVQ_NBUFS(EFX_EVQ_MAXNEVS)),
MC_CMD_INIT_EVQ_V2_OUT_LEN)];
unsigned int evq_type;
efx_qword_t *dma_addr;
uint64_t addr;
int npages;
@ -286,11 +304,25 @@ efx_mcdi_init_evq_v2(
MCDI_IN_SET_DWORD(req, INIT_EVQ_V2_IN_INSTANCE, instance);
MCDI_IN_SET_DWORD(req, INIT_EVQ_V2_IN_IRQ_NUM, irq);
switch (flags & EFX_EVQ_FLAGS_TYPE_MASK) {
case EFX_EVQ_FLAGS_TYPE_AUTO:
evq_type = MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO;
break;
case EFX_EVQ_FLAGS_TYPE_THROUGHPUT:
evq_type = MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT;
break;
case EFX_EVQ_FLAGS_TYPE_LOW_LATENCY:
evq_type = MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY;
break;
default:
rc = EINVAL;
goto fail2;
}
MCDI_IN_POPULATE_DWORD_4(req, INIT_EVQ_V2_IN_FLAGS,
INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
INIT_EVQ_V2_IN_FLAG_RPTR_DOS, 0,
INIT_EVQ_V2_IN_FLAG_INT_ARMD, 0,
INIT_EVQ_V2_IN_FLAG_TYPE, MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
INIT_EVQ_V2_IN_FLAG_TYPE, evq_type);
/* If the value is zero then disable the timer */
if (us == 0) {
@ -302,7 +334,7 @@ efx_mcdi_init_evq_v2(
unsigned int ticks;
if ((rc = efx_ev_usecs_to_ticks(enp, us, &ticks)) != 0)
goto fail2;
goto fail3;
MCDI_IN_SET_DWORD(req, INIT_EVQ_V2_IN_TMR_MODE,
MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF);
@ -330,12 +362,12 @@ efx_mcdi_init_evq_v2(
if (req.emr_rc != 0) {
rc = req.emr_rc;
goto fail3;
goto fail4;
}
if (req.emr_out_length_used < MC_CMD_INIT_EVQ_V2_OUT_LEN) {
rc = EMSGSIZE;
goto fail4;
goto fail5;
}
/* NOTE: ignore the returned IRQ param as firmware does not set it. */
@ -345,6 +377,8 @@ efx_mcdi_init_evq_v2(
return (0);
fail5:
EFSYS_PROBE(fail5);
fail4:
EFSYS_PROBE(fail4);
fail3:
@ -416,6 +450,7 @@ ef10_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__in efx_evq_t *eep)
{
efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
@ -459,29 +494,30 @@ ef10_ev_qcreate(
if (encp->enc_init_evq_v2_supported) {
/*
* On Medford the low latency license is required to enable RX
* and event cut through and to disable RX batching. We let the
* firmware decide the settings to use. If the adapter has a low
* latency license, it will choose the best settings for low
* latency, otherwise it choose the best settings for
* throughput.
* and event cut through and to disable RX batching. If event
* queue type in flags is auto, we let the firmware decide the
* settings to use. If the adapter has a low latency license,
* it will choose the best settings for low latency, otherwise
* it will choose the best settings for throughput.
*/
rc = efx_mcdi_init_evq_v2(enp, index, esmp, n, irq, us);
rc = efx_mcdi_init_evq_v2(enp, index, esmp, n, irq, us, flags);
if (rc != 0)
goto fail4;
} else {
/*
* On Huntington we need to specify the settings to use. We
* favour latency if the adapter is running low-latency firmware
* and throughput otherwise, and assume not support RX batching
* implies the adapter is running low-latency firmware. (This
* is how it's been done since Huntington GA. It doesn't make
* much sense with hindsight as the 'low-latency' firmware
* variant is also best for throughput, and does now support RX
* batching).
* On Huntington we need to specify the settings to use.
* If event queue type in flags is auto, we favour throughput
* if the adapter is running virtualization supporting firmware
* (i.e. the full featured firmware variant)
* and latency otherwise. The Ethernet Virtual Bridging
* capability is used to make this decision. (Note though that
* the low latency firmware variant is also best for
* throughput and corresponding type should be specified
* to choose it.)
*/
boolean_t low_latency = encp->enc_rx_batching_enabled ? 0 : 1;
rc = efx_mcdi_init_evq(enp, index, esmp, n, irq, us,
low_latency);
boolean_t low_latency = encp->enc_datapath_cap_evb ? 0 : 1;
rc = efx_mcdi_init_evq(enp, index, esmp, n, irq, us, flags,
low_latency);
if (rc != 0)
goto fail5;
}

View File

@ -85,6 +85,7 @@ ef10_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__in efx_evq_t *eep);
void

View File

@ -1595,6 +1595,11 @@ efx_ev_fini(
#define EFX_EVQ_SIZE(_nevs) ((_nevs) * sizeof (efx_qword_t))
#define EFX_EVQ_NBUFS(_nevs) (EFX_EVQ_SIZE(_nevs) / EFX_BUF_SIZE)
#define EFX_EVQ_FLAGS_TYPE_MASK (0x3)
#define EFX_EVQ_FLAGS_TYPE_AUTO (0x0)
#define EFX_EVQ_FLAGS_TYPE_THROUGHPUT (0x1)
#define EFX_EVQ_FLAGS_TYPE_LOW_LATENCY (0x2)
extern __checkReturn efx_rc_t
efx_ev_qcreate(
__in efx_nic_t *enp,
@ -1603,6 +1608,7 @@ efx_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__deref_out efx_evq_t **eepp);
extern void

View File

@ -71,6 +71,7 @@ siena_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__in efx_evq_t *eep);
static void
@ -228,6 +229,7 @@ efx_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__deref_out efx_evq_t **eepp)
{
const efx_ev_ops_t *eevop = enp->en_eevop;
@ -264,7 +266,8 @@ efx_ev_qcreate(
enp->en_ev_qcount++;
*eepp = eep;
if ((rc = eevop->eevo_qcreate(enp, index, esmp, n, id, us, eep)) != 0)
if ((rc = eevop->eevo_qcreate(enp, index, esmp, n, id, us, flags,
eep)) != 0)
goto fail2;
return (0);
@ -1279,6 +1282,7 @@ siena_ev_qcreate(
__in size_t n,
__in uint32_t id,
__in uint32_t us,
__in uint32_t flags,
__in efx_evq_t *eep)
{
efx_nic_cfg_t *encp = &(enp->en_nic_cfg);

View File

@ -95,7 +95,7 @@ typedef struct efx_ev_ops_s {
void (*eevo_fini)(efx_nic_t *);
efx_rc_t (*eevo_qcreate)(efx_nic_t *, unsigned int,
efsys_mem_t *, size_t, uint32_t,
uint32_t, efx_evq_t *);
uint32_t, uint32_t, efx_evq_t *);
void (*eevo_qdestroy)(efx_evq_t *);
efx_rc_t (*eevo_qprime)(efx_evq_t *, unsigned int);
void (*eevo_qpost)(efx_evq_t *, uint16_t);

View File

@ -703,7 +703,8 @@ sfxge_ev_qstart(struct sfxge_softc *sc, unsigned int index)
/* Create the common code event queue. */
if ((rc = efx_ev_qcreate(sc->enp, index, esmp, evq->entries,
evq->buf_base_id, sc->ev_moderation, &evq->common)) != 0)
evq->buf_base_id, sc->ev_moderation, EFX_EVQ_FLAGS_TYPE_AUTO,
&evq->common)) != 0)
goto fail;
SFXGE_EVQ_LOCK(evq);