net/mlx5: replace countdown with threshold for Tx completions
Replacing the variable countdown (which depends on the number of descriptors) with a fixed relative threshold known at compile time improves performance by reducing the TX queue structure footprint and the amount of code to manage completions during a burst. Completions are now requested at most once per burst after threshold is reached. Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com> Signed-off-by: Vasily Philipov <vasilyf@mellanox.com>
This commit is contained in:
parent
99c12dcca6
commit
c305090bba
@ -48,8 +48,11 @@
|
|||||||
/* Maximum number of special flows. */
|
/* Maximum number of special flows. */
|
||||||
#define MLX5_MAX_SPECIAL_FLOWS 4
|
#define MLX5_MAX_SPECIAL_FLOWS 4
|
||||||
|
|
||||||
/* Request send completion once in every 64 sends, might be less. */
|
/*
|
||||||
#define MLX5_PMD_TX_PER_COMP_REQ 64
|
* Request TX completion every time descriptors reach this threshold since
|
||||||
|
* the previous request. Must be a power of two for performance reasons.
|
||||||
|
*/
|
||||||
|
#define MLX5_TX_COMP_THRESH 32
|
||||||
|
|
||||||
/* RSS Indirection table size. */
|
/* RSS Indirection table size. */
|
||||||
#define RSS_INDIRECTION_TABLE_SIZE 256
|
#define RSS_INDIRECTION_TABLE_SIZE 256
|
||||||
|
@ -156,9 +156,6 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe,
|
|||||||
* Manage TX completions.
|
* Manage TX completions.
|
||||||
*
|
*
|
||||||
* When sending a burst, mlx5_tx_burst() posts several WRs.
|
* When sending a burst, mlx5_tx_burst() posts several WRs.
|
||||||
* To improve performance, a completion event is only required once every
|
|
||||||
* MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information
|
|
||||||
* for other WRs, but this information would not be used anyway.
|
|
||||||
*
|
*
|
||||||
* @param txq
|
* @param txq
|
||||||
* Pointer to TX queue structure.
|
* Pointer to TX queue structure.
|
||||||
@ -172,14 +169,16 @@ txq_complete(struct txq *txq)
|
|||||||
uint16_t elts_free = txq->elts_tail;
|
uint16_t elts_free = txq->elts_tail;
|
||||||
uint16_t elts_tail;
|
uint16_t elts_tail;
|
||||||
uint16_t cq_ci = txq->cq_ci;
|
uint16_t cq_ci = txq->cq_ci;
|
||||||
unsigned int wqe_ci = (unsigned int)-1;
|
volatile struct mlx5_cqe64 *cqe = NULL;
|
||||||
|
volatile union mlx5_wqe *wqe;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned int idx = cq_ci & cqe_cnt;
|
volatile struct mlx5_cqe64 *tmp;
|
||||||
volatile struct mlx5_cqe64 *cqe = &(*txq->cqes)[idx].cqe64;
|
|
||||||
|
|
||||||
if (check_cqe64(cqe, cqe_n, cq_ci) == 1)
|
tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
|
||||||
|
if (check_cqe64(tmp, cqe_n, cq_ci))
|
||||||
break;
|
break;
|
||||||
|
cqe = tmp;
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
|
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
|
||||||
if (!check_cqe64_seen(cqe))
|
if (!check_cqe64_seen(cqe))
|
||||||
@ -193,14 +192,15 @@ txq_complete(struct txq *txq)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif /* NDEBUG */
|
#endif /* NDEBUG */
|
||||||
wqe_ci = ntohs(cqe->wqe_counter);
|
|
||||||
++cq_ci;
|
++cq_ci;
|
||||||
} while (1);
|
} while (1);
|
||||||
if (unlikely(wqe_ci == (unsigned int)-1))
|
if (unlikely(cqe == NULL))
|
||||||
return;
|
return;
|
||||||
|
wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)];
|
||||||
|
elts_tail = wqe->wqe.ctrl.data[3];
|
||||||
|
assert(elts_tail < txq->wqe_n);
|
||||||
/* Free buffers. */
|
/* Free buffers. */
|
||||||
elts_tail = (wqe_ci + 1) & (elts_n - 1);
|
while (elts_free != elts_tail) {
|
||||||
do {
|
|
||||||
struct rte_mbuf *elt = (*txq->elts)[elts_free];
|
struct rte_mbuf *elt = (*txq->elts)[elts_free];
|
||||||
unsigned int elts_free_next =
|
unsigned int elts_free_next =
|
||||||
(elts_free + 1) & (elts_n - 1);
|
(elts_free + 1) & (elts_n - 1);
|
||||||
@ -216,7 +216,7 @@ txq_complete(struct txq *txq)
|
|||||||
/* Only one segment needs to be freed. */
|
/* Only one segment needs to be freed. */
|
||||||
rte_pktmbuf_free_seg(elt);
|
rte_pktmbuf_free_seg(elt);
|
||||||
elts_free = elts_free_next;
|
elts_free = elts_free_next;
|
||||||
} while (elts_free != elts_tail);
|
}
|
||||||
txq->cq_ci = cq_ci;
|
txq->cq_ci = cq_ci;
|
||||||
txq->elts_tail = elts_tail;
|
txq->elts_tail = elts_tail;
|
||||||
/* Update the consumer index. */
|
/* Update the consumer index. */
|
||||||
@ -437,6 +437,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|||||||
const unsigned int elts_n = txq->elts_n;
|
const unsigned int elts_n = txq->elts_n;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned int max;
|
unsigned int max;
|
||||||
|
unsigned int comp;
|
||||||
volatile union mlx5_wqe *wqe;
|
volatile union mlx5_wqe *wqe;
|
||||||
struct rte_mbuf *buf;
|
struct rte_mbuf *buf;
|
||||||
|
|
||||||
@ -486,13 +487,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|||||||
buf->vlan_tci);
|
buf->vlan_tci);
|
||||||
else
|
else
|
||||||
mlx5_wqe_write(txq, wqe, addr, length, lkey);
|
mlx5_wqe_write(txq, wqe, addr, length, lkey);
|
||||||
/* Request completion if needed. */
|
wqe->wqe.ctrl.data[2] = 0;
|
||||||
if (unlikely(--txq->elts_comp == 0)) {
|
|
||||||
wqe->wqe.ctrl.data[2] = htonl(8);
|
|
||||||
txq->elts_comp = txq->elts_comp_cd_init;
|
|
||||||
} else {
|
|
||||||
wqe->wqe.ctrl.data[2] = 0;
|
|
||||||
}
|
|
||||||
/* Should we enable HW CKSUM offload */
|
/* Should we enable HW CKSUM offload */
|
||||||
if (buf->ol_flags &
|
if (buf->ol_flags &
|
||||||
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
|
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
|
||||||
@ -512,6 +507,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|||||||
/* Take a shortcut if nothing must be sent. */
|
/* Take a shortcut if nothing must be sent. */
|
||||||
if (unlikely(i == 0))
|
if (unlikely(i == 0))
|
||||||
return 0;
|
return 0;
|
||||||
|
/* Check whether completion threshold has been reached. */
|
||||||
|
comp = txq->elts_comp + i;
|
||||||
|
if (comp >= MLX5_TX_COMP_THRESH) {
|
||||||
|
/* Request completion on last WQE. */
|
||||||
|
wqe->wqe.ctrl.data[2] = htonl(8);
|
||||||
|
/* Save elts_head in unused "immediate" field of WQE. */
|
||||||
|
wqe->wqe.ctrl.data[3] = elts_head;
|
||||||
|
txq->elts_comp = 0;
|
||||||
|
} else {
|
||||||
|
txq->elts_comp = comp;
|
||||||
|
}
|
||||||
#ifdef MLX5_PMD_SOFT_COUNTERS
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
||||||
/* Increment sent packets counter. */
|
/* Increment sent packets counter. */
|
||||||
txq->stats.opackets += i;
|
txq->stats.opackets += i;
|
||||||
|
@ -238,8 +238,7 @@ struct hash_rxq {
|
|||||||
struct txq {
|
struct txq {
|
||||||
uint16_t elts_head; /* Current index in (*elts)[]. */
|
uint16_t elts_head; /* Current index in (*elts)[]. */
|
||||||
uint16_t elts_tail; /* First element awaiting completion. */
|
uint16_t elts_tail; /* First element awaiting completion. */
|
||||||
uint16_t elts_comp_cd_init; /* Initial value for countdown. */
|
uint16_t elts_comp; /* Counter since last completion request. */
|
||||||
uint16_t elts_comp; /* Elements before asking a completion. */
|
|
||||||
uint16_t elts_n; /* (*elts)[] length. */
|
uint16_t elts_n; /* (*elts)[] length. */
|
||||||
uint16_t cq_ci; /* Consumer index for completion queue. */
|
uint16_t cq_ci; /* Consumer index for completion queue. */
|
||||||
uint16_t cqe_n; /* Number of CQ elements. */
|
uint16_t cqe_n; /* Number of CQ elements. */
|
||||||
@ -247,6 +246,7 @@ struct txq {
|
|||||||
uint16_t wqe_n; /* Number of WQ elements. */
|
uint16_t wqe_n; /* Number of WQ elements. */
|
||||||
uint16_t bf_offset; /* Blueflame offset. */
|
uint16_t bf_offset; /* Blueflame offset. */
|
||||||
uint16_t bf_buf_size; /* Blueflame size. */
|
uint16_t bf_buf_size; /* Blueflame size. */
|
||||||
|
uint32_t qp_num_8s; /* QP number shifted by 8. */
|
||||||
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
|
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
|
||||||
volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
|
volatile union mlx5_wqe (*wqes)[]; /* Work queue. */
|
||||||
volatile uint32_t *qp_db; /* Work queue doorbell. */
|
volatile uint32_t *qp_db; /* Work queue doorbell. */
|
||||||
@ -259,7 +259,6 @@ struct txq {
|
|||||||
} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
|
} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
|
||||||
struct rte_mbuf *(*elts)[]; /* TX elements. */
|
struct rte_mbuf *(*elts)[]; /* TX elements. */
|
||||||
struct mlx5_txq_stats stats; /* TX queue counters. */
|
struct mlx5_txq_stats stats; /* TX queue counters. */
|
||||||
uint32_t qp_num_8s; /* QP number shifted by 8. */
|
|
||||||
} __rte_cache_aligned;
|
} __rte_cache_aligned;
|
||||||
|
|
||||||
/* TX queue control descriptor. */
|
/* TX queue control descriptor. */
|
||||||
|
@ -89,6 +89,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
|
|||||||
DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
|
DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
|
||||||
txq_ctrl->txq.elts_head = 0;
|
txq_ctrl->txq.elts_head = 0;
|
||||||
txq_ctrl->txq.elts_tail = 0;
|
txq_ctrl->txq.elts_tail = 0;
|
||||||
|
txq_ctrl->txq.elts_comp = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -108,6 +109,7 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
|
|||||||
DEBUG("%p: freeing WRs", (void *)txq_ctrl);
|
DEBUG("%p: freeing WRs", (void *)txq_ctrl);
|
||||||
txq_ctrl->txq.elts_head = 0;
|
txq_ctrl->txq.elts_head = 0;
|
||||||
txq_ctrl->txq.elts_tail = 0;
|
txq_ctrl->txq.elts_tail = 0;
|
||||||
|
txq_ctrl->txq.elts_comp = 0;
|
||||||
|
|
||||||
while (elts_tail != elts_head) {
|
while (elts_tail != elts_head) {
|
||||||
struct rte_mbuf *elt = (*elts)[elts_tail];
|
struct rte_mbuf *elt = (*elts)[elts_tail];
|
||||||
@ -274,15 +276,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
(void)conf; /* Thresholds configuration (ignored). */
|
(void)conf; /* Thresholds configuration (ignored). */
|
||||||
|
assert(desc > MLX5_TX_COMP_THRESH);
|
||||||
tmpl.txq.elts_n = desc;
|
tmpl.txq.elts_n = desc;
|
||||||
/*
|
|
||||||
* Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or
|
|
||||||
* at least 4 times per ring.
|
|
||||||
*/
|
|
||||||
tmpl.txq.elts_comp_cd_init =
|
|
||||||
((MLX5_PMD_TX_PER_COMP_REQ < (desc / 4)) ?
|
|
||||||
MLX5_PMD_TX_PER_COMP_REQ : (desc / 4));
|
|
||||||
tmpl.txq.elts_comp = tmpl.txq.elts_comp_cd_init;
|
|
||||||
/* MRs will be registered in mp2mr[] later. */
|
/* MRs will be registered in mp2mr[] later. */
|
||||||
attr.rd = (struct ibv_exp_res_domain_init_attr){
|
attr.rd = (struct ibv_exp_res_domain_init_attr){
|
||||||
.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
|
.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |
|
||||||
@ -302,7 +297,8 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
|
|||||||
.res_domain = tmpl.rd,
|
.res_domain = tmpl.rd,
|
||||||
};
|
};
|
||||||
tmpl.cq = ibv_exp_create_cq(priv->ctx,
|
tmpl.cq = ibv_exp_create_cq(priv->ctx,
|
||||||
(desc / tmpl.txq.elts_comp_cd_init) - 1,
|
(((desc / MLX5_TX_COMP_THRESH) - 1) ?
|
||||||
|
((desc / MLX5_TX_COMP_THRESH) - 1) : 1),
|
||||||
NULL, NULL, 0, &attr.cq);
|
NULL, NULL, 0, &attr.cq);
|
||||||
if (tmpl.cq == NULL) {
|
if (tmpl.cq == NULL) {
|
||||||
ret = ENOMEM;
|
ret = ENOMEM;
|
||||||
@ -454,6 +450,13 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
|||||||
return -E_RTE_SECONDARY;
|
return -E_RTE_SECONDARY;
|
||||||
|
|
||||||
priv_lock(priv);
|
priv_lock(priv);
|
||||||
|
if (desc <= MLX5_TX_COMP_THRESH) {
|
||||||
|
WARN("%p: number of descriptors requested for TX queue %u"
|
||||||
|
" must be higher than MLX5_TX_COMP_THRESH, using"
|
||||||
|
" %u instead of %u",
|
||||||
|
(void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
|
||||||
|
desc = MLX5_TX_COMP_THRESH + 1;
|
||||||
|
}
|
||||||
if (!rte_is_power_of_2(desc)) {
|
if (!rte_is_power_of_2(desc)) {
|
||||||
desc = 1 << log2above(desc);
|
desc = 1 << log2above(desc);
|
||||||
WARN("%p: increased number of descriptors in TX queue %u"
|
WARN("%p: increased number of descriptors in TX queue %u"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user