sfxge: Separate software Tx queue limit for non-TCP traffic
Add separate software Tx queue limit for non-TCP traffic to make total limit higher and avoid local drops of TCP packets because of no backpressure. There is no point to make non-TCP limit high since without backpressure UDP stream easily overflows any sensible limit. Split early drops statistics since it is better to have separate counter for each drop reason to make it unabmiguous. Add software Tx queue high watermark. The information is very useful to understand how big queues grow under traffic load. Sponsored by: Solarflare Communications, Inc. Approved by: gnn (mentor)
This commit is contained in:
parent
d9e49c8352
commit
93929f253d
@ -93,10 +93,18 @@ Supported values are: 512, 1024, 2048 and 4096.
|
||||
.It Va hw.sfxge.tx_dpl_get_max
|
||||
The maximum length of the deferred packet
|
||||
.Dq get-list
|
||||
for queued transmit
|
||||
packets, used only if the transmit queue lock can be acquired.
|
||||
for queued transmit packets (TCP and non-TCP), used only if the transmit
|
||||
queue lock can be acquired.
|
||||
If a packet is dropped, the
|
||||
.Va tx_early_drops
|
||||
.Va tx_get_overflow
|
||||
counter is incremented and the local sender receives ENOBUFS.
|
||||
The value must be greater than 0.
|
||||
.It Va hw.sfxge.tx_dpl_get_non_tcp_max
|
||||
The maximum number of non-TCP packets in the deferred packet
|
||||
.Dq get-list
|
||||
, used only if the transmit queue lock can be acquired.
|
||||
If packet is dropped, the
|
||||
.Va tx_get_non_tcp_overflow
|
||||
counter is incremented and the local sender receives ENOBUFS.
|
||||
The value must be greater than 0.
|
||||
.It Va hw.sfxge.tx_dpl_put_max
|
||||
@ -105,7 +113,7 @@ The maximum length of the deferred packet
|
||||
for queued transmit
|
||||
packets, used if the transmit queue lock cannot be acquired.
|
||||
If a packet is dropped, the
|
||||
.Va tx_early_drops
|
||||
.Va tx_put_overflow
|
||||
counter is incremented and the local sender receives ENOBUFS.
|
||||
The value must be greater than or equal to 0.
|
||||
.It Va hw.sfxge.N.max_rss_channels
|
||||
|
@ -85,14 +85,23 @@ static int sfxge_tx_dpl_get_max = SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT;
|
||||
TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max);
|
||||
SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN,
|
||||
&sfxge_tx_dpl_get_max, 0,
|
||||
"Maximum number of packets in deferred packet get-list");
|
||||
"Maximum number of any packets in deferred packet get-list");
|
||||
|
||||
#define SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \
|
||||
SFXGE_PARAM(tx_dpl_get_non_tcp_max)
|
||||
static int sfxge_tx_dpl_get_non_tcp_max =
|
||||
SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT;
|
||||
TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max);
|
||||
SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN,
|
||||
&sfxge_tx_dpl_get_non_tcp_max, 0,
|
||||
"Maximum number of non-TCP packets in deferred packet get-list");
|
||||
|
||||
#define SFXGE_PARAM_TX_DPL_PUT_MAX SFXGE_PARAM(tx_dpl_put_max)
|
||||
static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT;
|
||||
TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max);
|
||||
SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN,
|
||||
&sfxge_tx_dpl_put_max, 0,
|
||||
"Maximum number of packets in deferred packet put-list");
|
||||
"Maximum number of any packets in deferred packet put-list");
|
||||
|
||||
#endif
|
||||
|
||||
@ -147,6 +156,15 @@ sfxge_tx_qcomplete(struct sfxge_txq *txq, struct sfxge_evq *evq)
|
||||
|
||||
#ifdef SFXGE_HAVE_MQ
|
||||
|
||||
static inline unsigned int
|
||||
sfxge_is_mbuf_non_tcp(struct mbuf *mbuf)
|
||||
{
|
||||
/* Absense of TCP checksum flags does not mean that it is non-TCP
|
||||
* but it should be true if user wants to achieve high throughput.
|
||||
*/
|
||||
return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reorder the put list and append it to the get list.
|
||||
*/
|
||||
@ -158,6 +176,7 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
|
||||
volatile uintptr_t *putp;
|
||||
uintptr_t put;
|
||||
unsigned int count;
|
||||
unsigned int non_tcp_count;
|
||||
|
||||
mtx_assert(&txq->lock, MA_OWNED);
|
||||
|
||||
@ -176,9 +195,11 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
|
||||
get_next = NULL;
|
||||
|
||||
count = 0;
|
||||
non_tcp_count = 0;
|
||||
do {
|
||||
struct mbuf *put_next;
|
||||
|
||||
non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf);
|
||||
put_next = mbuf->m_nextpkt;
|
||||
mbuf->m_nextpkt = get_next;
|
||||
get_next = mbuf;
|
||||
@ -192,6 +213,7 @@ sfxge_tx_qdpl_swizzle(struct sfxge_txq *txq)
|
||||
*stdp->std_getp = get_next;
|
||||
stdp->std_getp = get_tailp;
|
||||
stdp->std_get_count += count;
|
||||
stdp->std_get_non_tcp_count += non_tcp_count;
|
||||
}
|
||||
|
||||
#endif /* SFXGE_HAVE_MQ */
|
||||
@ -382,6 +404,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
|
||||
struct sfxge_tx_dpl *stdp;
|
||||
struct mbuf *mbuf, *next;
|
||||
unsigned int count;
|
||||
unsigned int non_tcp_count;
|
||||
unsigned int pushed;
|
||||
int rc;
|
||||
|
||||
@ -396,6 +419,10 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
|
||||
|
||||
mbuf = stdp->std_get;
|
||||
count = stdp->std_get_count;
|
||||
non_tcp_count = stdp->std_get_non_tcp_count;
|
||||
|
||||
if (count > stdp->std_get_hiwat)
|
||||
stdp->std_get_hiwat = count;
|
||||
|
||||
while (count != 0) {
|
||||
KASSERT(mbuf != NULL, ("mbuf == NULL"));
|
||||
@ -410,6 +437,7 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
|
||||
|
||||
rc = sfxge_tx_queue_mbuf(txq, mbuf);
|
||||
--count;
|
||||
non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf);
|
||||
mbuf = next;
|
||||
if (rc != 0)
|
||||
continue;
|
||||
@ -426,12 +454,16 @@ sfxge_tx_qdpl_drain(struct sfxge_txq *txq)
|
||||
|
||||
if (count == 0) {
|
||||
KASSERT(mbuf == NULL, ("mbuf != NULL"));
|
||||
KASSERT(non_tcp_count == 0,
|
||||
("inconsistent TCP/non-TCP detection"));
|
||||
stdp->std_get = NULL;
|
||||
stdp->std_get_count = 0;
|
||||
stdp->std_get_non_tcp_count = 0;
|
||||
stdp->std_getp = &stdp->std_get;
|
||||
} else {
|
||||
stdp->std_get = mbuf;
|
||||
stdp->std_get_count = count;
|
||||
stdp->std_get_non_tcp_count = non_tcp_count;
|
||||
}
|
||||
|
||||
if (txq->added != pushed)
|
||||
@ -491,8 +523,18 @@ sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked)
|
||||
|
||||
sfxge_tx_qdpl_swizzle(txq);
|
||||
|
||||
if (stdp->std_get_count >= stdp->std_get_max)
|
||||
if (stdp->std_get_count >= stdp->std_get_max) {
|
||||
txq->get_overflow++;
|
||||
return (ENOBUFS);
|
||||
}
|
||||
if (sfxge_is_mbuf_non_tcp(mbuf)) {
|
||||
if (stdp->std_get_non_tcp_count >=
|
||||
stdp->std_get_non_tcp_max) {
|
||||
txq->get_non_tcp_overflow++;
|
||||
return (ENOBUFS);
|
||||
}
|
||||
stdp->std_get_non_tcp_count++;
|
||||
}
|
||||
|
||||
*(stdp->std_getp) = mbuf;
|
||||
stdp->std_getp = &mbuf->m_nextpkt;
|
||||
@ -513,8 +555,10 @@ sfxge_tx_qdpl_put(struct sfxge_txq *txq, struct mbuf *mbuf, int locked)
|
||||
old_len = mp->m_pkthdr.csum_data;
|
||||
} else
|
||||
old_len = 0;
|
||||
if (old_len >= stdp->std_put_max)
|
||||
if (old_len >= stdp->std_put_max) {
|
||||
atomic_add_long(&txq->put_overflow, 1);
|
||||
return (ENOBUFS);
|
||||
}
|
||||
mbuf->m_pkthdr.csum_data = old_len + 1;
|
||||
mbuf->m_nextpkt = (void *)old;
|
||||
} while (atomic_cmpset_ptr(putp, old, new) == 0);
|
||||
@ -535,6 +579,7 @@ sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
|
||||
|
||||
if (!SFXGE_LINK_UP(txq->sc)) {
|
||||
rc = ENETDOWN;
|
||||
atomic_add_long(&txq->netdown_drops, 1);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -572,7 +617,6 @@ sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m)
|
||||
|
||||
fail:
|
||||
m_freem(m);
|
||||
atomic_add_long(&txq->early_drops, 1);
|
||||
return (rc);
|
||||
}
|
||||
|
||||
@ -591,6 +635,7 @@ sfxge_tx_qdpl_flush(struct sfxge_txq *txq)
|
||||
}
|
||||
stdp->std_get = NULL;
|
||||
stdp->std_get_count = 0;
|
||||
stdp->std_get_non_tcp_count = 0;
|
||||
stdp->std_getp = &stdp->std_get;
|
||||
|
||||
mtx_unlock(&txq->lock);
|
||||
@ -1402,6 +1447,13 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
|
||||
rc = EINVAL;
|
||||
goto fail_tx_dpl_get_max;
|
||||
}
|
||||
if (sfxge_tx_dpl_get_non_tcp_max <= 0) {
|
||||
log(LOG_ERR, "%s=%d must be greater than 0",
|
||||
SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX,
|
||||
sfxge_tx_dpl_get_non_tcp_max);
|
||||
rc = EINVAL;
|
||||
goto fail_tx_dpl_get_max;
|
||||
}
|
||||
if (sfxge_tx_dpl_put_max < 0) {
|
||||
log(LOG_ERR, "%s=%d must be greater or equal to 0",
|
||||
SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max);
|
||||
@ -1413,6 +1465,7 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
|
||||
stdp = &txq->dpl;
|
||||
stdp->std_put_max = sfxge_tx_dpl_put_max;
|
||||
stdp->std_get_max = sfxge_tx_dpl_get_max;
|
||||
stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max;
|
||||
stdp->std_getp = &stdp->std_get;
|
||||
|
||||
mtx_init(&txq->lock, "txq", NULL, MTX_DEF);
|
||||
@ -1421,6 +1474,14 @@ sfxge_tx_qinit(struct sfxge_softc *sc, unsigned int txq_index,
|
||||
SYSCTL_CHILDREN(txq_node), OID_AUTO,
|
||||
"dpl_get_count", CTLFLAG_RD | CTLFLAG_STATS,
|
||||
&stdp->std_get_count, 0, "");
|
||||
SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev),
|
||||
SYSCTL_CHILDREN(txq_node), OID_AUTO,
|
||||
"dpl_get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS,
|
||||
&stdp->std_get_non_tcp_count, 0, "");
|
||||
SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev),
|
||||
SYSCTL_CHILDREN(txq_node), OID_AUTO,
|
||||
"dpl_get_hiwat", CTLFLAG_RD | CTLFLAG_STATS,
|
||||
&stdp->std_get_hiwat, 0, "");
|
||||
#endif
|
||||
|
||||
txq->type = type;
|
||||
@ -1458,7 +1519,10 @@ static const struct {
|
||||
SFXGE_TX_STAT(tso_long_headers, tso_long_headers),
|
||||
SFXGE_TX_STAT(tx_collapses, collapses),
|
||||
SFXGE_TX_STAT(tx_drops, drops),
|
||||
SFXGE_TX_STAT(tx_early_drops, early_drops),
|
||||
SFXGE_TX_STAT(tx_get_overflow, get_overflow),
|
||||
SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow),
|
||||
SFXGE_TX_STAT(tx_put_overflow, put_overflow),
|
||||
SFXGE_TX_STAT(tx_netdown_drops, netdown_drops),
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -75,21 +75,29 @@ struct sfxge_tx_mapping {
|
||||
enum sfxge_tx_buf_flags flags;
|
||||
};
|
||||
|
||||
#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 1024
|
||||
#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64
|
||||
#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT (64 * 1024)
|
||||
#define SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT 1024
|
||||
#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64
|
||||
|
||||
/*
|
||||
* Deferred packet list.
|
||||
*/
|
||||
struct sfxge_tx_dpl {
|
||||
unsigned int std_get_max; /* Maximum number of packets
|
||||
unsigned int std_get_max; /* Maximum number of packets
|
||||
* in get list */
|
||||
unsigned int std_put_max; /* Maximum number of packets
|
||||
unsigned int std_get_non_tcp_max; /* Maximum number
|
||||
* of non-TCP packets
|
||||
* in get list */
|
||||
unsigned int std_put_max; /* Maximum number of packets
|
||||
* in put list */
|
||||
uintptr_t std_put; /* Head of put list. */
|
||||
struct mbuf *std_get; /* Head of get list. */
|
||||
struct mbuf **std_getp; /* Tail of get list. */
|
||||
unsigned int std_get_count; /* Packets in get list. */
|
||||
uintptr_t std_put; /* Head of put list. */
|
||||
struct mbuf *std_get; /* Head of get list. */
|
||||
struct mbuf **std_getp; /* Tail of get list. */
|
||||
unsigned int std_get_count; /* Packets in get list. */
|
||||
unsigned int std_get_non_tcp_count; /* Non-TCP packets
|
||||
* in get list */
|
||||
unsigned int std_get_hiwat; /* Packets in get list
|
||||
* high watermark */
|
||||
};
|
||||
|
||||
|
||||
@ -165,7 +173,10 @@ struct sfxge_txq {
|
||||
unsigned long tso_long_headers;
|
||||
unsigned long collapses;
|
||||
unsigned long drops;
|
||||
unsigned long early_drops;
|
||||
unsigned long get_overflow;
|
||||
unsigned long get_non_tcp_overflow;
|
||||
unsigned long put_overflow;
|
||||
unsigned long netdown_drops;
|
||||
|
||||
/* The following fields change more often, and are used mostly
|
||||
* on the completion path
|
||||
|
Loading…
x
Reference in New Issue
Block a user