mlx5: get rid of the WR structure in Rx queue elements

Removing this structure reduces the size of SG and non-SG RX queue elements
significantly to improve performance.

An nice side effect is that the mbuf pointer is now fully stored in
struct rxq_elt instead of relying on the WR ID data offset hack.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Olga Shern <olgas@mellanox.com>
Signed-off-by: Or Ami <ora@mellanox.com>
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
Adrien Mazarguil 2015-10-30 19:55:05 +01:00 committed by Thomas Monjalon
parent 61bdf1e0e5
commit aa7f63ab35
5 changed files with 98 additions and 132 deletions

View File

@ -115,24 +115,6 @@ struct priv {
rte_spinlock_t lock; /* Lock for control functions. */
};
/* Work Request ID data type (64 bit). */
typedef union {
struct {
uint32_t id;
uint16_t offset;
} data;
uint64_t raw;
} wr_id_t;
/* Compile-time check. */
static inline void wr_id_t_check(void)
{
wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))];
(void)check;
(void)wr_id_t_check;
}
/**
* Lock private structure to protect it from concurrent access in the
* control path.

View File

@ -60,6 +60,7 @@
#endif
#include "mlx5.h"
#include "mlx5_autoconf.h"
#include "mlx5_rxtx.h"
#include "mlx5_utils.h"
#include "mlx5_defs.h"
@ -97,16 +98,10 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
for (i = 0; (i != elts_n); ++i) {
unsigned int j;
struct rxq_elt_sp *elt = &(*elts)[i];
struct ibv_recv_wr *wr = &elt->wr;
struct ibv_sge (*sges)[RTE_DIM(elt->sges)] = &elt->sges;
/* These two arrays must have the same size. */
assert(RTE_DIM(elt->sges) == RTE_DIM(elt->bufs));
/* Configure WR. */
wr->wr_id = i;
wr->next = &(*elts)[(i + 1)].wr;
wr->sg_list = &(*sges)[0];
wr->num_sge = RTE_DIM(*sges);
/* For each SGE (segment). */
for (j = 0; (j != RTE_DIM(elt->bufs)); ++j) {
struct ibv_sge *sge = &(*sges)[j];
@ -149,8 +144,6 @@ rxq_alloc_elts_sp(struct rxq *rxq, unsigned int elts_n,
assert(sge->length == rte_pktmbuf_tailroom(buf));
}
}
/* The last WR pointer must be NULL. */
(*elts)[(i - 1)].wr.next = NULL;
DEBUG("%p: allocated and configured %u WRs (%zu segments)",
(void *)rxq, elts_n, (elts_n * RTE_DIM((*elts)[0].sges)));
rxq->elts_n = elts_n;
@ -242,7 +235,6 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
/* For each WR (packet). */
for (i = 0; (i != elts_n); ++i) {
struct rxq_elt *elt = &(*elts)[i];
struct ibv_recv_wr *wr = &elt->wr;
struct ibv_sge *sge = &(*elts)[i].sge;
struct rte_mbuf *buf;
@ -258,16 +250,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
ret = ENOMEM;
goto error;
}
/* Configure WR. Work request ID contains its own index in
* the elts array and the offset between SGE buffer header and
* its data. */
WR_ID(wr->wr_id).id = i;
WR_ID(wr->wr_id).offset =
(((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -
(uintptr_t)buf);
wr->next = &(*elts)[(i + 1)].wr;
wr->sg_list = sge;
wr->num_sge = 1;
elt->buf = buf;
/* Headroom is reserved by rte_pktmbuf_alloc(). */
assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);
/* Buffer is supposed to be empty. */
@ -282,21 +265,7 @@ rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)
sge->lkey = rxq->mr->lkey;
/* Redundant check for tailroom. */
assert(sge->length == rte_pktmbuf_tailroom(buf));
/* Make sure elts index and SGE mbuf pointer can be deduced
* from WR ID. */
if ((WR_ID(wr->wr_id).id != i) ||
((void *)((uintptr_t)sge->addr -
WR_ID(wr->wr_id).offset) != buf)) {
ERROR("%p: cannot store index and offset in WR ID",
(void *)rxq);
sge->addr = 0;
rte_pktmbuf_free(buf);
ret = EOVERFLOW;
goto error;
}
}
/* The last WR pointer must be NULL. */
(*elts)[(i - 1)].wr.next = NULL;
DEBUG("%p: allocated and configured %u single-segment WRs",
(void *)rxq, elts_n);
rxq->elts_n = elts_n;
@ -309,14 +278,10 @@ error:
assert(pool == NULL);
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
struct rxq_elt *elt = &(*elts)[i];
struct rte_mbuf *buf;
struct rte_mbuf *buf = elt->buf;
if (elt->sge.addr == 0)
continue;
assert(WR_ID(elt->wr.wr_id).id == i);
buf = (void *)((uintptr_t)elt->sge.addr -
WR_ID(elt->wr.wr_id).offset);
rte_pktmbuf_free_seg(buf);
if (buf != NULL)
rte_pktmbuf_free_seg(buf);
}
rte_free(elts);
}
@ -345,14 +310,10 @@ rxq_free_elts(struct rxq *rxq)
return;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
struct rxq_elt *elt = &(*elts)[i];
struct rte_mbuf *buf;
struct rte_mbuf *buf = elt->buf;
if (elt->sge.addr == 0)
continue;
assert(WR_ID(elt->wr.wr_id).id == i);
buf = (void *)((uintptr_t)elt->sge.addr -
WR_ID(elt->wr.wr_id).offset);
rte_pktmbuf_free_seg(buf);
if (buf != NULL)
rte_pktmbuf_free_seg(buf);
}
rte_free(elts);
}
@ -552,7 +513,6 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
struct rte_mbuf **pool;
unsigned int i, k;
struct ibv_exp_qp_attr mod;
struct ibv_recv_wr *bad_wr;
int err;
int parent = (rxq == &priv->rxq_parent);
@ -670,11 +630,8 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
struct rxq_elt *elt = &(*elts)[i];
struct rte_mbuf *buf = (void *)
((uintptr_t)elt->sge.addr -
WR_ID(elt->wr.wr_id).offset);
struct rte_mbuf *buf = elt->buf;
assert(WR_ID(elt->wr.wr_id).id == i);
pool[k++] = buf;
}
}
@ -698,17 +655,41 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
rxq->elts_n = 0;
rte_free(rxq->elts.sp);
rxq->elts.sp = NULL;
/* Post WRs. */
err = ibv_post_recv(tmpl.qp,
(tmpl.sp ?
&(*tmpl.elts.sp)[0].wr :
&(*tmpl.elts.no_sp)[0].wr),
&bad_wr);
/* Post SGEs. */
assert(tmpl.if_qp != NULL);
if (tmpl.sp) {
struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
err = tmpl.if_qp->recv_sg_list
(tmpl.qp,
(*elts)[i].sges,
RTE_DIM((*elts)[i].sges));
#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
errno = ENOSYS;
err = -1;
#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
if (err)
break;
}
} else {
struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
err = tmpl.if_qp->recv_burst(
tmpl.qp,
&(*elts)[i].sge,
1);
if (err)
break;
}
}
if (err) {
ERROR("%p: ibv_post_recv() failed for WR %p: %s",
(void *)dev,
(void *)bad_wr,
strerror(err));
ERROR("%p: failed to post SGEs with error %d",
(void *)dev, err);
/* Set err because it does not contain a valid errno value. */
err = EIO;
goto skip_rtr;
}
mod = (struct ibv_exp_qp_attr){
@ -761,10 +742,10 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
struct ibv_exp_res_domain_init_attr rd;
} attr;
enum ibv_exp_query_intf_status status;
struct ibv_recv_wr *bad_wr;
struct rte_mbuf *buf;
int ret = 0;
int parent = (rxq == &priv->rxq_parent);
unsigned int i;
(void)conf; /* Thresholds configuration (ignored). */
/*
@ -900,28 +881,7 @@ skip_mr:
(void *)dev, strerror(ret));
goto error;
}
ret = ibv_post_recv(tmpl.qp,
(tmpl.sp ?
&(*tmpl.elts.sp)[0].wr :
&(*tmpl.elts.no_sp)[0].wr),
&bad_wr);
if (ret) {
ERROR("%p: ibv_post_recv() failed for WR %p: %s",
(void *)dev,
(void *)bad_wr,
strerror(ret));
goto error;
}
skip_alloc:
mod = (struct ibv_exp_qp_attr){
.qp_state = IBV_QPS_RTR
};
ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
if (ret) {
ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
(void *)dev, strerror(ret));
goto error;
}
/* Save port ID. */
tmpl.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id);
@ -947,6 +907,51 @@ skip_alloc:
(void *)dev, status);
goto error;
}
/* Post SGEs. */
if (!parent && tmpl.sp) {
struct rxq_elt_sp (*elts)[tmpl.elts_n] = tmpl.elts.sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
#ifdef HAVE_EXP_QP_BURST_RECV_SG_LIST
ret = tmpl.if_qp->recv_sg_list
(tmpl.qp,
(*elts)[i].sges,
RTE_DIM((*elts)[i].sges));
#else /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
errno = ENOSYS;
ret = -1;
#endif /* HAVE_EXP_QP_BURST_RECV_SG_LIST */
if (ret)
break;
}
} else if (!parent) {
struct rxq_elt (*elts)[tmpl.elts_n] = tmpl.elts.no_sp;
for (i = 0; (i != RTE_DIM(*elts)); ++i) {
ret = tmpl.if_qp->recv_burst(
tmpl.qp,
&(*elts)[i].sge,
1);
if (ret)
break;
}
}
if (ret) {
ERROR("%p: failed to post SGEs with error %d",
(void *)dev, ret);
/* Set ret because it does not contain a valid errno value. */
ret = EIO;
goto error;
}
mod = (struct ibv_exp_qp_attr){
.qp_state = IBV_QPS_RTR
};
ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);
if (ret) {
ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
(void *)dev, strerror(ret));
goto error;
}
/* Clean up rxq in case we're reinitializing it. */
DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq);
rxq_cleanup(rxq);

View File

@ -612,8 +612,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
return 0;
for (i = 0; (i != pkts_n); ++i) {
struct rxq_elt_sp *elt = &(*elts)[elts_head];
struct ibv_recv_wr *wr = &elt->wr;
uint64_t wr_id = wr->wr_id;
unsigned int len;
unsigned int pkt_buf_len;
struct rte_mbuf *pkt_buf = NULL; /* Buffer returned in pkts. */
@ -623,12 +621,6 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint32_t flags;
/* Sanity checks. */
#ifdef NDEBUG
(void)wr_id;
#endif
assert(wr_id < rxq->elts_n);
assert(wr->sg_list == elt->sges);
assert(wr->num_sge == RTE_DIM(elt->sges));
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
@ -677,6 +669,7 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
struct rte_mbuf *rep;
unsigned int seg_tailroom;
assert(seg != NULL);
/*
* Fetch initial bytes of packet descriptor into a
* cacheline while allocating rep.
@ -688,9 +681,8 @@ mlx5_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Unable to allocate a replacement mbuf,
* repost WR.
*/
DEBUG("rxq=%p, wr_id=%" PRIu64 ":"
" can't allocate a new mbuf",
(void *)rxq, wr_id);
DEBUG("rxq=%p: can't allocate a new mbuf",
(void *)rxq);
if (pkt_buf != NULL) {
*pkt_buf_next = NULL;
rte_pktmbuf_free(pkt_buf);
@ -825,18 +817,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
return mlx5_rx_burst_sp(dpdk_rxq, pkts, pkts_n);
for (i = 0; (i != pkts_n); ++i) {
struct rxq_elt *elt = &(*elts)[elts_head];
struct ibv_recv_wr *wr = &elt->wr;
uint64_t wr_id = wr->wr_id;
unsigned int len;
struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -
WR_ID(wr_id).offset);
struct rte_mbuf *seg = elt->buf;
struct rte_mbuf *rep;
uint32_t flags;
/* Sanity checks. */
assert(WR_ID(wr_id).id < rxq->elts_n);
assert(wr->sg_list == &elt->sge);
assert(wr->num_sge == 1);
assert(seg != NULL);
assert(elts_head < rxq->elts_n);
assert(rxq->elts_head < rxq->elts_n);
/*
@ -888,9 +875,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Unable to allocate a replacement mbuf,
* repost WR.
*/
DEBUG("rxq=%p, wr_id=%" PRIu32 ":"
" can't allocate a new mbuf",
(void *)rxq, WR_ID(wr_id).id);
DEBUG("rxq=%p: can't allocate a new mbuf",
(void *)rxq);
/* Increment out of memory counters. */
++rxq->stats.rx_nombuf;
++rxq->priv->dev->data->rx_mbuf_alloc_failed;
@ -900,10 +886,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Reconfigure sge to use rep instead of seg. */
elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;
assert(elt->sge.lkey == rxq->mr->lkey);
WR_ID(wr->wr_id).offset =
(((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) -
(uintptr_t)rep);
assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id);
elt->buf = rep;
/* Add SGE to array for repost. */
sges[i] = elt->sge;

View File

@ -81,16 +81,14 @@ struct mlx5_txq_stats {
/* RX element (scattered packets). */
struct rxq_elt_sp {
struct ibv_recv_wr wr; /* Work Request. */
struct ibv_sge sges[MLX5_PMD_SGE_WR_N]; /* Scatter/Gather Elements. */
struct rte_mbuf *bufs[MLX5_PMD_SGE_WR_N]; /* SGEs buffers. */
};
/* RX element. */
struct rxq_elt {
struct ibv_recv_wr wr; /* Work Request. */
struct ibv_sge sge; /* Scatter/Gather Element. */
/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */
struct rte_mbuf *buf; /* SGE buffer. */
};
struct priv;

View File

@ -161,6 +161,4 @@ pmd_drv_log_basename(const char *s)
\
snprintf(name, sizeof(name), __VA_ARGS__)
#define WR_ID(o) (((wr_id_t *)&(o))->data)
#endif /* RTE_PMD_MLX5_UTILS_H_ */