net/mlx5: separate DPDK from verbs Rx queue objects

Move verbs object to their own functions to allocate/release them
independently from the DPDK queue.  At the same time a reference counter
is added to help in issues detections when the queue is being release
but still in use somewhere else (flows for instance).

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
This commit is contained in:
Nélio Laranjeiro 2017-10-09 16:44:46 +02:00 committed by Ferruh Yigit
parent f8fb87d51f
commit 09cb5b5817
6 changed files with 461 additions and 305 deletions

View File

@ -253,6 +253,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
priv_socket_uninit(priv);
ret = mlx5_priv_rxq_ibv_verify(priv);
if (ret)
WARN("%p: some Verbs Rx queue still remain", (void *)priv);
ret = priv_flow_verify(priv);
if (ret)
WARN("%p: some flows still remain", (void *)priv);

View File

@ -147,6 +147,7 @@ struct priv {
struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
@ -290,7 +291,6 @@ int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
int priv_flow_start(struct priv *);
void priv_flow_stop(struct priv *);
int priv_flow_rxq_in_use(struct priv *, struct mlx5_rxq_data *);
int priv_flow_verify(struct priv *);
/* mlx5_socket.c */

View File

@ -95,11 +95,11 @@ struct rte_flow {
struct ibv_flow *ibv_flow; /**< Verbs flow. */
struct ibv_wq *wq; /**< Verbs work queue. */
struct ibv_cq *cq; /**< Verbs completion queue. */
uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
uint32_t mark:1; /**< Set if the flow is marked. */
uint32_t drop:1; /**< Drop queue. */
uint64_t hash_fields; /**< Fields that participate in the hash. */
struct mlx5_rxq_data *rxqs[]; /**< Pointer to the queues array. */
uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
uint16_t queues_n; /**< Number of queues in the list. */
};
/** Static initializer for items. */
@ -1096,23 +1096,21 @@ priv_flow_create_action_queue(struct priv *priv,
assert(priv->pd);
assert(priv->ctx);
assert(!flow->actions.drop);
rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
sizeof(*rte_flow->rxqs) * flow->actions.queues_n,
0);
rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
if (!rte_flow) {
rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
NULL, "cannot allocate flow memory");
return NULL;
}
for (i = 0; i < flow->actions.queues_n; ++i) {
struct mlx5_rxq_ctrl *rxq;
struct mlx5_rxq_ibv *rxq_ibv =
mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
rxq = container_of((*priv->rxqs)[flow->actions.queues[i]],
struct mlx5_rxq_ctrl, rxq);
wqs[i] = rxq->wq;
rte_flow->rxqs[i] = &rxq->rxq;
++rte_flow->rxqs_n;
rxq->rxq.mark |= flow->actions.mark;
wqs[i] = rxq_ibv->wq;
rte_flow->queues[i] = flow->actions.queues[i];
++rte_flow->queues_n;
(*priv->rxqs)[flow->actions.queues[i]]->mark |=
flow->actions.mark;
}
/* finalise indirection table. */
for (j = 0; i < wqs_n; ++i, ++j) {
@ -1290,6 +1288,8 @@ static void
priv_flow_destroy(struct priv *priv,
struct rte_flow *flow)
{
unsigned int i;
TAILQ_REMOVE(&priv->flows, flow, next);
if (flow->ibv_flow)
claim_zero(ibv_destroy_flow(flow->ibv_flow));
@ -1299,37 +1299,33 @@ priv_flow_destroy(struct priv *priv,
claim_zero(ibv_destroy_qp(flow->qp));
if (flow->ind_table)
claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
if (flow->mark) {
for (i = 0; i != flow->queues_n; ++i) {
struct rte_flow *tmp;
struct mlx5_rxq_data *rxq;
uint32_t mark_n = 0;
uint32_t queue_n;
struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
/*
* To remove the mark from the queue, the queue must not be
* present in any other marked flow (RSS or not).
*/
for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
rxq = flow->rxqs[queue_n];
for (tmp = TAILQ_FIRST(&priv->flows);
tmp;
tmp = TAILQ_NEXT(tmp, next)) {
uint32_t tqueue_n;
if (flow->mark) {
int mark = 0;
TAILQ_FOREACH(tmp, &priv->flows, next) {
unsigned int j;
if (tmp->drop)
continue;
for (tqueue_n = 0;
tqueue_n < tmp->rxqs_n;
++tqueue_n) {
struct mlx5_rxq_data *trxq;
trxq = tmp->rxqs[tqueue_n];
if (rxq == trxq)
++mark_n;
}
if (!tmp->mark)
continue;
for (j = 0; (j != tmp->queues_n) && !mark; j++)
if (tmp->queues[j] == flow->queues[i])
mark = 1;
}
rxq->mark = !!mark_n;
rxq_data->mark = mark;
}
mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
}
free:
rte_free(flow->ibv_attr);
@ -1523,8 +1519,8 @@ priv_flow_stop(struct priv *priv)
if (flow->mark) {
unsigned int n;
for (n = 0; n < flow->rxqs_n; ++n)
flow->rxqs[n]->mark = 0;
for (n = 0; n < flow->queues_n; ++n)
(*priv->rxqs)[flow->queues[n]]->mark = 0;
}
DEBUG("Flow %p removed", (void *)flow);
}
@ -1566,39 +1562,8 @@ priv_flow_start(struct priv *priv)
if (flow->mark) {
unsigned int n;
for (n = 0; n < flow->rxqs_n; ++n)
flow->rxqs[n]->mark = 1;
}
}
return 0;
}
/**
* Verify if the Rx queue is used in a flow.
*
* @param priv
* Pointer to private structure.
* @param rxq
* Pointer to the queue to search.
*
* @return
* Nonzero if the queue is used by a flow.
*/
int
priv_flow_rxq_in_use(struct priv *priv, struct mlx5_rxq_data *rxq)
{
struct rte_flow *flow;
for (flow = TAILQ_FIRST(&priv->flows);
flow;
flow = TAILQ_NEXT(flow, next)) {
unsigned int n;
if (flow->drop)
continue;
for (n = 0; n < flow->rxqs_n; ++n) {
if (flow->rxqs[n] == rxq)
return 1;
for (n = 0; n < flow->queues_n; ++n)
(*priv->rxqs)[flow->queues[n]]->mark = 1;
}
}
return 0;

View File

@ -378,7 +378,7 @@ priv_create_hash_rxqs(struct priv *priv)
rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
struct mlx5_rxq_ctrl, rxq);
wqs[i] = rxq_ctrl->wq;
wqs[i] = rxq_ctrl->ibv->wq;
}
/* Get number of hash RX queues to configure. */
for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
@ -645,8 +645,6 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
/* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
struct rte_mbuf *buf;
volatile struct mlx5_wqe_data_seg *scat =
&(*rxq_ctrl->rxq.wqes)[i];
buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
@ -667,21 +665,12 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl, unsigned int elts_n)
DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
PKT_LEN(buf) = DATA_LEN(buf);
NB_SEGS(buf) = 1;
/* scat->addr must be able to store a pointer. */
assert(sizeof(scat->addr) >= sizeof(uintptr_t));
*scat = (struct mlx5_wqe_data_seg){
.addr =
rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)),
.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
.lkey = rxq_ctrl->mr->lkey,
};
(*rxq_ctrl->rxq.elts)[i] = buf;
}
if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
assert(rxq->elts_n == rxq->cqe_n);
/* Initialize default rearm_data for vPMD. */
mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
rte_mbuf_refcnt_set(mbuf_init, 1);
@ -759,75 +748,11 @@ void
mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
{
DEBUG("cleaning up %p", (void *)rxq_ctrl);
rxq_free_elts(rxq_ctrl);
if (rxq_ctrl->wq != NULL)
claim_zero(ibv_destroy_wq(rxq_ctrl->wq));
if (rxq_ctrl->cq != NULL)
claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
if (rxq_ctrl->channel != NULL)
claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
if (rxq_ctrl->mr != NULL)
priv_mr_release(rxq_ctrl->priv, rxq_ctrl->mr);
if (rxq_ctrl->ibv)
mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
}
/**
* Initialize RX queue.
*
* @param tmpl
* Pointer to RX queue control template.
*
* @return
* 0 on success, errno value on failure.
*/
static inline int
rxq_setup(struct mlx5_rxq_ctrl *tmpl)
{
struct ibv_cq *ibcq = tmpl->cq;
struct mlx5dv_cq cq_info;
struct mlx5dv_rwq rwq;
const uint16_t desc_n =
(1 << tmpl->rxq.elts_n) + tmpl->priv->rx_vec_en *
MLX5_VPMD_DESCS_PER_LOOP;
struct rte_mbuf *(*elts)[desc_n] =
rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
struct mlx5dv_obj obj;
int ret = 0;
obj.cq.in = ibcq;
obj.cq.out = &cq_info;
obj.rwq.in = tmpl->wq;
obj.rwq.out = &rwq;
ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
if (ret != 0) {
return -EINVAL;
}
if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
"it should be set to %u", RTE_CACHE_LINE_SIZE);
return EINVAL;
}
if (elts == NULL)
return ENOMEM;
tmpl->rxq.rq_db = rwq.dbrec;
tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
tmpl->rxq.cq_ci = 0;
tmpl->rxq.rq_ci = 0;
tmpl->rxq.rq_pi = 0;
tmpl->rxq.cq_db = cq_info.dbrec;
tmpl->rxq.wqes =
(volatile struct mlx5_wqe_data_seg (*)[])
(uintptr_t)rwq.buf;
tmpl->rxq.cqes =
(volatile struct mlx5_cqe (*)[])
(uintptr_t)cq_info.buf;
tmpl->rxq.elts = elts;
tmpl->rxq.cq_uar = cq_info.cq_uar;
tmpl->rxq.cqn = cq_info.cqn;
tmpl->rxq.cq_arm_sn = 0;
return 0;
}
/**
* Configure a RX queue.
*
@ -853,29 +778,28 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
const uint16_t desc_n =
desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
struct mlx5_rxq_ctrl tmpl = {
.priv = priv,
.socket = socket,
.rxq = {
.elts = rte_calloc_socket("RXQ", 1,
desc_n *
sizeof(struct rte_mbuf *), 0,
socket),
.elts_n = log2above(desc),
.mp = mp,
.rss_hash = priv->rxqs_n > 1,
},
};
struct ibv_wq_attr mod;
union {
struct ibv_cq_init_attr_ex cq;
struct ibv_wq_init_attr wq;
struct ibv_cq_ex cq_attr;
} attr;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
unsigned int cqe_n = desc - 1;
const uint16_t desc_n =
desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
struct rte_mbuf *(*elts)[desc_n] = NULL;
int ret = 0;
(void)conf; /* Thresholds configuration (ignored). */
if (dev->data->dev_conf.intr_conf.rxq)
tmpl.irq = 1;
/* Enable scattered packets support for this queue if necessary. */
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
@ -928,77 +852,13 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
if (priv->hw_csum_l2tun)
tmpl.rxq.csum_l2tun =
!!dev->data->dev_conf.rxmode.hw_ip_checksum;
/* Use the entire RX mempool as the memory region. */
tmpl.mr = priv_mr_get(priv, mp);
if (tmpl.mr == NULL) {
tmpl.mr = priv_mr_new(priv, mp);
if (tmpl.mr == NULL) {
ret = EINVAL;
ERROR("%p: MR creation failure: %s",
(void *)dev, strerror(ret));
goto error;
}
}
if (dev->data->dev_conf.intr_conf.rxq) {
tmpl.channel = ibv_create_comp_channel(priv->ctx);
if (tmpl.channel == NULL) {
ret = ENOMEM;
ERROR("%p: Rx interrupt completion channel creation"
" failure: %s",
(void *)dev, strerror(ret));
goto error;
}
}
attr.cq = (struct ibv_cq_init_attr_ex){
.comp_mask = 0,
};
if (priv->cqe_comp) {
attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
*/
if (rxq_check_vec_support(&tmpl.rxq) < 0)
cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
}
tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0);
if (tmpl.cq == NULL) {
ret = ENOMEM;
ERROR("%p: CQ creation failure: %s",
(void *)dev, strerror(ret));
goto error;
}
DEBUG("priv->device_attr.max_qp_wr is %d",
priv->device_attr.orig_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.orig_attr.max_sge);
/* Configure VLAN stripping. */
tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
!!dev->data->dev_conf.rxmode.hw_vlan_strip);
attr.wq = (struct ibv_wq_init_attr){
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_WQT_RQ,
/* Max number of outstanding WRs. */
.max_wr = desc >> tmpl.rxq.sges_n,
/* Max number of scatter/gather elements in a WR. */
.max_sge = 1 << tmpl.rxq.sges_n,
.pd = priv->pd,
.cq = tmpl.cq,
.comp_mask =
IBV_WQ_FLAGS_CVLAN_STRIPPING |
0,
.create_flags = (tmpl.rxq.vlan_strip ?
IBV_WQ_FLAGS_CVLAN_STRIPPING :
0),
};
/* By default, FCS (CRC) is stripped by hardware. */
if (dev->data->dev_conf.rxmode.hw_strip_crc) {
tmpl.rxq.crc_present = 0;
} else if (priv->hw_fcs_strip) {
/* Ask HW/Verbs to leave CRC in place when supported. */
attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
tmpl.rxq.crc_present = 1;
} else {
WARN("%p: CRC stripping has been disabled but will still"
@ -1013,60 +873,21 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
tmpl.rxq.crc_present ? "disabled" : "enabled",
tmpl.rxq.crc_present << 2);
#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING")) {
; /* Nothing else to do. */
else if (priv->hw_padding) {
} else if (priv->hw_padding) {
INFO("%p: enabling packet padding on queue %p",
(void *)dev, (void *)rxq_ctrl);
attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
} else
} else {
WARN("%p: packet padding has been requested but is not"
" supported, make sure MLNX_OFED and firmware are"
" up to date",
(void *)dev);
}
#endif
tmpl.wq = ibv_create_wq(priv->ctx, &attr.wq);
if (tmpl.wq == NULL) {
ret = (errno ? errno : EINVAL);
ERROR("%p: WQ creation failure: %s",
(void *)dev, strerror(ret));
goto error;
}
/*
* Make sure number of WRs*SGEs match expectations since a queue
* cannot allocate more than "desc" buffers.
*/
if (((int)attr.wq.max_wr != (desc >> tmpl.rxq.sges_n)) ||
((int)attr.wq.max_sge != (1 << tmpl.rxq.sges_n))) {
ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
(void *)dev,
(desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
attr.wq.max_wr, attr.wq.max_sge);
ret = EINVAL;
goto error;
}
/* Save port ID. */
tmpl.rxq.port_id = dev->data->port_id;
DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
/* Change queue state to ready. */
mod = (struct ibv_wq_attr){
.attr_mask = IBV_WQ_ATTR_STATE,
.wq_state = IBV_WQS_RDY,
};
ret = ibv_modify_wq(tmpl.wq, &mod);
if (ret) {
ERROR("%p: WQ state to IBV_WQS_RDY failed: %s",
(void *)dev, strerror(ret));
goto error;
}
ret = rxq_setup(&tmpl);
if (ret) {
ERROR("%p: cannot initialize RX queue structure: %s",
(void *)dev, strerror(ret));
goto error;
}
ret = rxq_alloc_elts(&tmpl, desc);
if (ret) {
ERROR("%p: RXQ allocation failed: %s",
@ -1085,17 +906,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
rte_free(tmpl.rxq.elts);
tmpl.rxq.elts = elts;
*rxq_ctrl = tmpl;
/* Update doorbell counter. */
rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
rte_wmb();
*rxq_ctrl->rxq.rq_db = rte_cpu_to_be_32(rxq_ctrl->rxq.rq_ci);
DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
assert(ret == 0);
return 0;
error:
elts = tmpl.rxq.elts;
rte_free(tmpl.rxq.elts);
mlx5_rxq_cleanup(&tmpl);
rte_free(elts);
assert(ret > 0);
return ret;
}
@ -1185,14 +1001,20 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
}
}
ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
if (ret)
if (ret) {
rte_free(rxq_ctrl);
else {
rxq_ctrl->rxq.stats.idx = idx;
DEBUG("%p: adding RX queue %p to list",
(void *)dev, (void *)rxq_ctrl);
(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
goto out;
}
rxq_ctrl->rxq.stats.idx = idx;
DEBUG("%p: adding RX queue %p to list",
(void *)dev, (void *)rxq_ctrl);
(*priv->rxqs)[idx] = &rxq_ctrl->rxq;
rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, idx);
if (!rxq_ctrl->ibv) {
ret = EAGAIN;
goto out;
}
out:
priv_unlock(priv);
return -ret;
}
@ -1219,7 +1041,7 @@ mlx5_rx_queue_release(void *dpdk_rxq)
rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
priv = rxq_ctrl->priv;
priv_lock(priv);
if (priv_flow_rxq_in_use(priv, rxq))
if (!mlx5_priv_rxq_ibv_releasable(priv, rxq_ctrl->ibv))
rte_panic("Rx queue %p is still used by a flow and cannot be"
" removed\n", (void *)rxq_ctrl);
for (i = 0; (i != priv->rxqs_n); ++i)
@ -1264,15 +1086,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
}
intr_handle->type = RTE_INTR_HANDLE_EXT;
for (i = 0; i != n; ++i) {
struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
/* This rxq ibv must not be released in this function. */
struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
int fd;
int flags;
int rc;
/* Skip queues that cannot request interrupts. */
if (!rxq || !rxq_ctrl->channel) {
if (!rxq_ibv || !rxq_ibv->channel) {
/* Use invalid intr_vec[] index to disable entry. */
intr_handle->intr_vec[i] =
RTE_INTR_VEC_RXTX_OFFSET +
@ -1286,7 +1107,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
priv_rx_intr_vec_disable(priv);
return -1;
}
fd = rxq_ctrl->channel->fd;
fd = rxq_ibv->channel->fd;
flags = fcntl(fd, F_GETFL);
rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
@ -1316,7 +1137,27 @@ void
priv_rx_intr_vec_disable(struct priv *priv)
{
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
unsigned int i;
unsigned int rxqs_n = priv->rxqs_n;
unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return;
for (i = 0; i != n; ++i) {
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_rxq_data *rxq_data;
if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
RTE_MAX_RXTX_INTR_VEC_ID)
continue;
/**
* Need to access directly the queue to release the reference
* kept in priv_rx_intr_vec_enable().
*/
rxq_data = (*priv->rxqs)[i];
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
}
rte_intr_free_epoll_fd(intr_handle);
free(intr_handle->intr_vec);
intr_handle->nb_efd = 0;
@ -1363,16 +1204,30 @@ int
mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct priv *priv = mlx5_get_priv(dev);
struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
struct mlx5_rxq_data *rxq_data;
struct mlx5_rxq_ctrl *rxq_ctrl;
int ret = 0;
if (!rxq || !rxq_ctrl->channel) {
priv_lock(priv);
rxq_data = (*priv->rxqs)[rx_queue_id];
if (!rxq_data) {
ret = EINVAL;
} else {
mlx5_arm_cq(rxq, rxq->cq_arm_sn);
goto exit;
}
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
if (rxq_ctrl->irq) {
struct mlx5_rxq_ibv *rxq_ibv;
rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
if (!rxq_ibv) {
ret = EINVAL;
goto exit;
}
mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
}
exit:
priv_unlock(priv);
if (ret)
WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
return -ret;
@ -1393,25 +1248,345 @@ int
mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct priv *priv = mlx5_get_priv(dev);
struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
struct mlx5_rxq_data *rxq_data;
struct mlx5_rxq_ctrl *rxq_ctrl;
struct mlx5_rxq_ibv *rxq_ibv = NULL;
struct ibv_cq *ev_cq;
void *ev_ctx;
int ret;
int ret = 0;
if (!rxq || !rxq_ctrl->channel) {
priv_lock(priv);
rxq_data = (*priv->rxqs)[rx_queue_id];
if (!rxq_data) {
ret = EINVAL;
} else {
ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
rxq->cq_arm_sn++;
if (ret || ev_cq != rxq_ctrl->cq)
ret = EINVAL;
goto exit;
}
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
if (!rxq_ctrl->irq)
goto exit;
rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
if (!rxq_ibv) {
ret = EINVAL;
goto exit;
}
ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
if (ret || ev_cq != rxq_ibv->cq) {
ret = EINVAL;
goto exit;
}
rxq_data->cq_arm_sn++;
ibv_ack_cq_events(rxq_ibv->cq, 1);
exit:
if (rxq_ibv)
mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
priv_unlock(priv);
if (ret)
WARN("unable to disable interrupt on rx queue %d",
rx_queue_id);
else
ibv_ack_cq_events(rxq_ctrl->cq, 1);
return -ret;
}
/**
* Create the Rx queue Verbs object.
*
* @param priv
* Pointer to private structure.
* @param idx
* Queue index in DPDK Rx queue array
*
* @return
* The Verbs object initialised if it can be created.
*/
struct mlx5_rxq_ibv*
mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
{
struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
struct mlx5_rxq_ctrl *rxq_ctrl =
container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
struct ibv_wq_attr mod;
union {
struct ibv_cq_init_attr_ex cq;
struct ibv_wq_init_attr wq;
struct ibv_cq_ex cq_attr;
} attr;
unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
struct mlx5_rxq_ibv *tmpl;
struct mlx5dv_cq cq_info;
struct mlx5dv_rwq rwq;
unsigned int i;
int ret = 0;
struct mlx5dv_obj obj;
assert(rxq_data);
assert(!rxq_ctrl->ibv);
tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
rxq_ctrl->socket);
if (!tmpl) {
ERROR("%p: cannot allocate verbs resources",
(void *)rxq_ctrl);
goto error;
}
tmpl->rxq_ctrl = rxq_ctrl;
/* Use the entire RX mempool as the memory region. */
tmpl->mr = priv_mr_get(priv, rxq_data->mp);
if (!tmpl->mr) {
tmpl->mr = priv_mr_new(priv, rxq_data->mp);
if (!tmpl->mr) {
ERROR("%p: MR creation failure", (void *)rxq_ctrl);
goto error;
}
}
if (rxq_ctrl->irq) {
tmpl->channel = ibv_create_comp_channel(priv->ctx);
if (!tmpl->channel) {
ERROR("%p: Comp Channel creation failure",
(void *)rxq_ctrl);
goto error;
}
}
attr.cq = (struct ibv_cq_init_attr_ex){
.comp_mask = 0,
};
if (priv->cqe_comp) {
attr.cq.comp_mask |= IBV_CQ_INIT_ATTR_MASK_FLAGS;
attr.cq.flags |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
*/
if (rxq_check_vec_support(rxq_data) < 0)
cqe_n *= 2;
}
tmpl->cq = ibv_create_cq(priv->ctx, cqe_n, NULL, tmpl->channel, 0);
if (tmpl->cq == NULL) {
ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
goto error;
}
DEBUG("priv->device_attr.max_qp_wr is %d",
priv->device_attr.orig_attr.max_qp_wr);
DEBUG("priv->device_attr.max_sge is %d",
priv->device_attr.orig_attr.max_sge);
attr.wq = (struct ibv_wq_init_attr){
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_WQT_RQ,
/* Max number of outstanding WRs. */
.max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
/* Max number of scatter/gather elements in a WR. */
.max_sge = 1 << rxq_data->sges_n,
.pd = priv->pd,
.cq = tmpl->cq,
.comp_mask =
IBV_WQ_FLAGS_CVLAN_STRIPPING |
0,
.create_flags = (rxq_data->vlan_strip ?
IBV_WQ_FLAGS_CVLAN_STRIPPING :
0),
};
/* By default, FCS (CRC) is stripped by hardware. */
if (rxq_data->crc_present) {
attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
}
#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
if (priv->hw_padding) {
attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
}
#endif
tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
if (tmpl->wq == NULL) {
ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
goto error;
}
/*
* Make sure number of WRs*SGEs match expectations since a queue
* cannot allocate more than "desc" buffers.
*/
if (((int)attr.wq.max_wr !=
((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
(void *)rxq_ctrl,
((1 << rxq_data->elts_n) >> rxq_data->sges_n),
(1 << rxq_data->sges_n),
attr.wq.max_wr, attr.wq.max_sge);
goto error;
}
/* Change queue state to ready. */
mod = (struct ibv_wq_attr){
.attr_mask = IBV_WQ_ATTR_STATE,
.wq_state = IBV_WQS_RDY,
};
ret = ibv_modify_wq(tmpl->wq, &mod);
if (ret) {
ERROR("%p: WQ state to IBV_WQS_RDY failed",
(void *)rxq_ctrl);
goto error;
}
obj.cq.in = tmpl->cq;
obj.cq.out = &cq_info;
obj.rwq.in = tmpl->wq;
obj.rwq.out = &rwq;
ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
if (ret != 0)
goto error;
if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
"it should be set to %u", RTE_CACHE_LINE_SIZE);
goto error;
}
/* Fill the rings. */
rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
(uintptr_t)rwq.buf;
for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
struct rte_mbuf *buf = (*rxq_data->elts)[i];
volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
/* scat->addr must be able to store a pointer. */
assert(sizeof(scat->addr) >= sizeof(uintptr_t));
*scat = (struct mlx5_wqe_data_seg){
.addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
uintptr_t)),
.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
.lkey = tmpl->mr->lkey,
};
}
rxq_data->rq_db = rwq.dbrec;
rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
rxq_data->cq_ci = 0;
rxq_data->rq_ci = 0;
rxq_data->rq_pi = 0;
rxq_data->zip = (struct rxq_zip){
.ai = 0,
};
rxq_data->cq_db = cq_info.dbrec;
rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
/* Update doorbell counter. */
rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
rte_wmb();
*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
rte_atomic32_inc(&tmpl->refcnt);
DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
(void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
return tmpl;
error:
if (tmpl->wq)
claim_zero(ibv_destroy_wq(tmpl->wq));
if (tmpl->cq)
claim_zero(ibv_destroy_cq(tmpl->cq));
if (tmpl->channel)
claim_zero(ibv_destroy_comp_channel(tmpl->channel));
if (tmpl->mr)
priv_mr_release(priv, tmpl->mr);
return NULL;
}
/**
* Get an Rx queue Verbs object.
*
* @param priv
* Pointer to private structure.
* @param idx
* Queue index in DPDK Rx queue array
*
* @return
* The Verbs object if it exists.
*/
struct mlx5_rxq_ibv*
mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
{
struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
struct mlx5_rxq_ctrl *rxq_ctrl;
if (idx >= priv->rxqs_n)
return NULL;
if (!rxq_data)
return NULL;
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
if (rxq_ctrl->ibv) {
priv_mr_get(priv, rxq_data->mp);
rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
(void *)rxq_ctrl->ibv,
rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
}
return rxq_ctrl->ibv;
}
/**
* Release an Rx verbs queue object.
*
* @param priv
* Pointer to private structure.
* @param rxq_ibv
* Verbs Rx queue object.
*
* @return
* 0 on success, errno value on failure.
*/
int
mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
{
int ret;
assert(rxq_ibv);
assert(rxq_ibv->wq);
assert(rxq_ibv->cq);
assert(rxq_ibv->mr);
ret = priv_mr_release(priv, rxq_ibv->mr);
if (!ret)
rxq_ibv->mr = NULL;
DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
(void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
rxq_free_elts(rxq_ibv->rxq_ctrl);
claim_zero(ibv_destroy_wq(rxq_ibv->wq));
claim_zero(ibv_destroy_cq(rxq_ibv->cq));
if (rxq_ibv->channel)
claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
LIST_REMOVE(rxq_ibv, next);
rte_free(rxq_ibv);
return 0;
}
return EBUSY;
}
/**
* Verify the Verbs Rx queue list is empty
*
* @param priv
* Pointer to private structure.
*
* @return the number of object not released.
*/
int
mlx5_priv_rxq_ibv_verify(struct priv *priv)
{
int ret = 0;
struct mlx5_rxq_ibv *rxq_ibv;
LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
(void *)rxq_ibv);
++ret;
}
return ret;
}
/**
* Return true if a single reference exists on the object.
*
* @param priv
* Pointer to private structure.
* @param rxq_ibv
* Verbs Rx queue object.
*/
int
mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
{
(void)priv;
assert(rxq_ibv);
return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
}

View File

@ -134,15 +134,24 @@ struct mlx5_rxq_data {
uint8_t cq_arm_sn; /* CQ arm seq number. */
} __rte_cache_aligned;
/* Verbs Rx queue elements. */
struct mlx5_rxq_ibv {
LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
rte_atomic32_t refcnt; /* Reference counter. */
struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_wq *wq; /* Work Queue. */
struct ibv_comp_channel *channel;
struct mlx5_mr *mr; /* Memory Region (for mp). */
};
/* RX queue control descriptor. */
struct mlx5_rxq_ctrl {
struct priv *priv; /* Back pointer to private data. */
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_wq *wq; /* Work Queue. */
struct mlx5_mr *mr; /* Memory Region (for mp). */
struct ibv_comp_channel *channel;
unsigned int socket; /* CPU socket ID for allocations. */
struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
struct mlx5_rxq_data rxq; /* Data path structure. */
unsigned int socket; /* CPU socket ID for allocations. */
unsigned int irq:1; /* Whether IRQ is enabled. */
};
/* Hash RX queue types. */
@ -310,6 +319,11 @@ int priv_rx_intr_vec_enable(struct priv *priv);
void priv_rx_intr_vec_disable(struct priv *priv);
int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
int mlx5_priv_rxq_ibv_verify(struct priv *);
/* mlx5_txq.c */
@ -347,7 +361,6 @@ uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
/* mlx5_mr.c */
struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
void mlx5_txq_mp2mr_iter(struct rte_mempool *, void *);
struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
unsigned int);

View File

@ -154,7 +154,7 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
.flags = vlan_offloads,
};
err = ibv_modify_wq(rxq_ctrl->wq, &mod);
err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
if (err) {
ERROR("%p: failed to modified stripping mode: %s",
(void *)priv, strerror(err));